谈到spark存储,第一反应先想到了RDD里的cache和persist。如果从RDD中的cache方法作为入口来看,cache与persist殊途同归,无非是persist支持可配的storage level作为入参,而cache直接就是默认了MEMORY_ONLY。核心是三个步骤,一个是把RDD内的storageLevel字段设置一下,另一个是在sparkcontext中标记一下,最后注册一下清理任务。最后一步先不看,仔细跟了一下在sparkcontext中标记这个动作,可以看到这个记录除了调试测试时候观察一下以外在核心流程中实际上不会用到这个标记,那么“可疑分子”就是RDD内的storageLevel这个属性了。另外,通过代码可以清楚看到,这个storageLevel只能被设置一次,换句话说就是一旦从NONE改为了一个值后就不能被修改了,否则会报错。
final def iterator(split: Partition, context: TaskContext): Iterator[T] = {    if (storageLevel != StorageLevel.NONE) {        SparkEnv.get.cacheManager.getOrCompute(this, split, context, storageLevel)    }else {        computeOrReadCheckpoint(split, context)    }}




/** Gets or computes an RDD partition. Used by RDD.iterator() when an RDD is cached. */  def getOrCompute[T](      rdd: RDD[T],      partition: Partition,      context: TaskContext,      storageLevel: StorageLevel): Iterator[T] = {      val key = RDDBlockId(rdd.id, partition.index)    logDebug(s"Looking for partition $key")    blockManager.get(key) match {      case Some(blockResult) =>        // Partition is already materialized, so just return its values        context.taskMetrics.inputMetrics = Some(blockResult.inputMetrics)        new InterruptibleIterator(context, blockResult.data.asInstanceOf[Iterator[T]])        case None =>        // Acquire a lock for loading this partition        // If another thread already holds the lock, wait for it to finish return its results        val storedValues = acquireLockForPartition[T](key)        if (storedValues.isDefined) {          return new InterruptibleIterator[T](context, storedValues.get)        }          // Otherwise, we have to load the partition ourselves        try {          logInfo(s"Partition $key not found, computing it")          val computedValues = rdd.computeOrReadCheckpoint(partition, context)            // If the task is running locally, do not persist the result          if (context.runningLocally) {            return computedValues          }            // Otherwise, cache the values and keep track of any updates in block statuses          val updatedBlocks = new ArrayBuffer[(BlockId, BlockStatus)]          val cachedValues = putInBlockManager(key, computedValues, storageLevel, updatedBlocks)          val metrics = context.taskMetrics          val lastUpdatedBlocks = metrics.updatedBlocks.getOrElse(Seq[(BlockId, BlockStatus)]())          metrics.updatedBlocks = Some(lastUpdatedBlocks ++ updatedBlocks.toSeq)          new InterruptibleIterator(context, cachedValues)          } finally {          loading.synchronized {            loading.remove(key)            loading.notifyAll()          }        }    }  }  

先根据传入的RDD ID和对应的partition index来获得在blockmanager上对应的Key,然后一个match来进行匹配:如果是已缓存的则直接返回用于获取数据的迭代器,具体的hasNext和next方法可以自行查阅;如果blockmanager上没有对应的数据,那么这里先会调用computeOrReadCheckpoint来进行计算,然后通过putInBlockManager方法把计算结果放到BlockManager,我们最开始说的storageLevel一路被人传来传去,在这里马上就要迎来人生的巅峰了。
/**    * Cache the values of a partition, keeping track of any updates in the storage statuses of    * other blocks along the way.    *    * The effective storage level refers to the level that actually specifies BlockManager put    * behavior, not the level originally specified by the user. This is mainly for forcing a    * MEMORY_AND_DISK partition to disk if there is not enough room to unroll the partition,    * while preserving the the original semantics of the RDD as specified by the application.    */    private def putInBlockManager[T](        key: BlockId,        values: Iterator[T],        level: StorageLevel,        updatedBlocks: ArrayBuffer[(BlockId, BlockStatus)],        effectiveStorageLevel: Option[StorageLevel] = None): Iterator[T] = {        val putLevel = effectiveStorageLevel.getOrElse(level)      if (!putLevel.useMemory) {        /*        * This RDD is not to be cached in memory, so we can just pass the computed values as an        * iterator directly to the BlockManager rather than first fully unrolling it in memory.        */        updatedBlocks ++=          blockManager.putIterator(key, values, level, tellMaster = true, effectiveStorageLevel)        blockManager.get(key) match {          case Some(v) => v.data.asInstanceOf[Iterator[T]]          case None =>            logInfo(s"Failure to store $key")            throw new BlockException(key, s"Block manager failed to return cached value for $key!")        }      } else {        /*        * This RDD is to be cached in memory. In this case we cannot pass the computed values        * to the BlockManager as an iterator and expect to read it back later. This is because        * we may end up dropping a partition from memory store before getting it back.        *        * In addition, we must be careful to not unroll the entire partition in memory at once.        * Otherwise, we may cause an OOM exception if the JVM does not have enough space for this        * single partition. Instead, we unroll the values cautiously, potentially aborting and        * dropping the partition to disk if applicable.        */        blockManager.memoryStore.unrollSafely(key, values, updatedBlocks) match {          case Left(arr) =>            // We have successfully unrolled the entire partition, so cache it in memory            updatedBlocks ++=              blockManager.putArray(key, arr, level, tellMaster = true, effectiveStorageLevel)            arr.iterator.asInstanceOf[Iterator[T]]          case Right(it) =>            // There is not enough space to cache this partition in memory            logWarning(s"Not enough space to cache partition $key in memory! " +              s"Free memory is ${blockManager.memoryStore.freeMemory} bytes.")            val returnValues = it.asInstanceOf[Iterator[T]]            if (putLevel.useDisk) {              logWarning(s"Persisting partition $key to disk instead.")              val diskOnlyLevel = StorageLevel(useDisk = true, useMemory = false,                useOffHeap = false, deserialized = false, putLevel.replication)              putInBlockManager[T](key, returnValues, level, updatedBlocks, Some(diskOnlyLevel))            } else {              returnValues            }        }      }    }  

private def doPut(        blockId: BlockId,        data: BlockValues,        level: StorageLevel,        tellMaster: Boolean = true,        effectiveStorageLevel: Option[StorageLevel] = None)      : Seq[(BlockId, BlockStatus)] = {        require(blockId != null, "BlockId is null")      require(level != null && level.isValid, "StorageLevel is null or invalid")      effectiveStorageLevel.foreach { level =>        require(level != null && level.isValid, "Effective StorageLevel is null or invalid")      }        // Return value      val updatedBlocks = new ArrayBuffer[(BlockId, BlockStatus)]        /* Remember the block's storage level so that we can correctly drop it to disk if it needs      * to be dropped right after it got put into memory. Note, however, that other threads will      * not be able to get() this block until we call markReady on its BlockInfo. */      val putBlockInfo = {        val tinfo = new BlockInfo(level, tellMaster)        // Do atomically !        val oldBlockOpt = blockInfo.putIfAbsent(blockId, tinfo)        if (oldBlockOpt.isDefined) {          if (oldBlockOpt.get.waitForReady()) {            logWarning(s"Block $blockId already exists on this machine; not re-adding it")            return updatedBlocks          }          // TODO: So the block info exists - but previous attempt to load it (?) failed.          // What do we do now ? Retry on it ?          oldBlockOpt.get        } else {          tinfo        }      }        val startTimeMs = System.currentTimeMillis        /* If we're storing values and we need to replicate the data, we'll want access to the values,      * but because our put will read the whole iterator, there will be no values left. For the      * case where the put serializes data, we'll remember the bytes, above; but for the case where      * it doesn't, such as deserialized storage, let's rely on the put returning an Iterator. */      var valuesAfterPut: Iterator[Any] = null        // Ditto for the bytes after the put      var bytesAfterPut: ByteBuffer = null        // Size of the block in bytes      var size = 0L        // The level we actually use to put the block      val putLevel = effectiveStorageLevel.getOrElse(level)        // If we're storing bytes, then initiate the replication before storing them locally.      // This is faster as data is already serialized and ready to send.      val replicationFuture = data match {        case b: ByteBufferValues if putLevel.replication > 1 =>          // Duplicate doesn't copy the bytes, but just creates a wrapper          val bufferView = b.buffer.duplicate()          Future { replicate(blockId, bufferView, putLevel) }        case _ => null      }        putBlockInfo.synchronized {        logTrace("Put for block %s took %s to get into synchronized block"          .format(blockId, Utils.getUsedTimeMs(startTimeMs)))          var marked = false        try {          // returnValues - Whether to return the values put          // blockStore - The type of storage to put these values into          val (returnValues, blockStore: BlockStore) = {            if (putLevel.useMemory) {              // Put it in memory first, even if it also has useDisk set to true;              // We will drop it to disk later if the memory store can't hold it.              (true, memoryStore)            } else if (putLevel.useOffHeap) {              // Use tachyon for off-heap storage              (false, tachyonStore)            } else if (putLevel.useDisk) {              // Don't get back the bytes from put unless we replicate them              (putLevel.replication > 1, diskStore)            } else {              assert(putLevel == StorageLevel.NONE)              throw new BlockException(                blockId, s"Attempted to put block $blockId without specifying storage level!")            }          }            // Actually put the values          val result = data match {            case IteratorValues(iterator) =>              blockStore.putIterator(blockId, iterator, putLevel, returnValues)            case ArrayValues(array) =>              blockStore.putArray(blockId, array, putLevel, returnValues)            case ByteBufferValues(bytes) =>              bytes.rewind()              blockStore.putBytes(blockId, bytes, putLevel)          }          size = result.size          result.data match {            case Left (newIterator) if putLevel.useMemory => valuesAfterPut = newIterator            case Right (newBytes) => bytesAfterPut = newBytes            case _ =>          }            // Keep track of which blocks are dropped from memory          if (putLevel.useMemory) {            result.droppedBlocks.foreach { updatedBlocks += _ }          }            val putBlockStatus = getCurrentBlockStatus(blockId, putBlockInfo)          if (putBlockStatus.storageLevel != StorageLevel.NONE) {            // Now that the block is in either the memory, tachyon, or disk store,            // let other threads read it, and tell the master about it.            marked = true            putBlockInfo.markReady(size)            if (tellMaster) {              reportBlockStatus(blockId, putBlockInfo, putBlockStatus)            }            updatedBlocks += ((blockId, putBlockStatus))          }        } finally {          // If we failed in putting the block to memory/disk, notify other possible readers          // that it has failed, and then remove it from the block info map.          if (!marked) {            // Note that the remove must happen before markFailure otherwise another thread            // could've inserted a new BlockInfo before we remove it.            blockInfo.remove(blockId)            putBlockInfo.markFailure()            logWarning(s"Putting block $blockId failed")          }        }      }      logDebug("Put block %s locally took %s".format(blockId, Utils.getUsedTimeMs(startTimeMs)))        // Either we're storing bytes and we asynchronously started replication, or we're storing      // values and need to serialize and replicate them now:      if (putLevel.replication > 1) {        data match {          case ByteBufferValues(bytes) =>            if (replicationFuture != null) {              Await.ready(replicationFuture, Duration.Inf)            }          case _ =>            val remoteStartTime = System.currentTimeMillis            // Serialize the block if not already done            if (bytesAfterPut == null) {              if (valuesAfterPut == null) {                throw new SparkException(                  "Underlying put returned neither an Iterator nor bytes! This shouldn't happen.")              }              bytesAfterPut = dataSerialize(blockId, valuesAfterPut)            }            replicate(blockId, bytesAfterPut, putLevel)            logDebug("Put block %s remotely took %s"              .format(blockId, Utils.getUsedTimeMs(remoteStartTime)))        }      }        BlockManager.dispose(bytesAfterPut)        if (putLevel.replication > 1) {        logDebug("Putting block %s with replication took %s"          .format(blockId, Utils.getUsedTimeMs(startTimeMs)))      } else {        logDebug("Putting block %s without replication took %s"          .format(blockId, Utils.getUsedTimeMs(startTimeMs)))      }        updatedBlocks    }  


