spark core 2.0 UnifiedMemoryManager

来源:互联网 发布:天谕男角色捏脸数据 编辑:程序博客网 时间:2024/05/16 12:59


object UnifiedMemoryManager {  // Set aside a fixed amount of memory for non-storage, non-execution purposes.  // This serves a function similar to `spark.memory.fraction`, but guarantees that we reserve  // sufficient memory for the system even for small heaps. E.g. if we have a 1GB JVM, then  // the memory used for execution and storage will be (1024 - 300) * 0.6 = 434MB by default.  private val RESERVED_SYSTEM_MEMORY_BYTES = 300 * 1024 * 1024  def apply(conf: SparkConf, numCores: Int): UnifiedMemoryManager = {    val maxMemory = getMaxMemory(conf)    new UnifiedMemoryManager(      conf,      maxHeapMemory = maxMemory,      onHeapStorageRegionSize =        (maxMemory * conf.getDouble("spark.memory.storageFraction", 0.5)).toLong,      numCores = numCores)  }



private[spark] class UnifiedMemoryManager private[memory] (    conf: SparkConf,    val maxHeapMemory: Long,    onHeapStorageRegionSize: Long,    numCores: Int)  extends MemoryManager(    conf,    numCores,    onHeapStorageRegionSize,    maxHeapMemory - onHeapStorageRegionSize) {

private[spark] abstract class MemoryManager(    conf: SparkConf,    numCores: Int,    onHeapStorageMemory: Long,    onHeapExecutionMemory: Long) extends Logging {

/**   * Try to acquire up to `numBytes` of execution memory for the current task and return the   * number of bytes obtained, or 0 if none can be allocated.   *   * This call may block until there is enough free memory in some situations, to make sure each   * task has a chance to ramp up to at least 1 / 2N of the total memory pool (where N is the # of   * active tasks) before it is forced to spill. This can happen if the number of tasks increase   * but an older task had a lot of memory already.   */  override private[memory] def acquireExecutionMemory(      numBytes: Long,      taskAttemptId: Long,      memoryMode: MemoryMode): Long = synchronized {    assertInvariants()    assert(numBytes >= 0)    val (executionPool, storagePool, storageRegionSize, maxMemory) = memoryMode match {      case MemoryMode.ON_HEAP => (        onHeapExecutionMemoryPool,        onHeapStorageMemoryPool,        onHeapStorageRegionSize,        maxHeapMemory)      case MemoryMode.OFF_HEAP => (        offHeapExecutionMemoryPool,        offHeapStorageMemoryPool,        offHeapStorageMemory,        maxOffHeapMemory)    }    /**     * Grow the execution pool by evicting cached blocks, thereby shrinking the storage pool.     *     * When acquiring memory for a task, the execution pool may need to make multiple     * attempts. Each attempt must be able to evict storage in case another task jumps in     * and caches a large block between the attempts. This is called once per attempt.     */    def maybeGrowExecutionPool(extraMemoryNeeded: Long): Unit = {      if (extraMemoryNeeded > 0) {        // There is not enough free memory in the execution pool, so try to reclaim memory from        // storage. We can reclaim any free memory from the storage pool. If the storage pool        // has grown to become larger than `storageRegionSize`, we can evict blocks and reclaim        // the memory that storage has borrowed from execution.        val memoryReclaimableFromStorage = math.max(          storagePool.memoryFree,          storagePool.poolSize - storageRegionSize)        if (memoryReclaimableFromStorage > 0) {          // Only reclaim as much space as is necessary and available:          val spaceToReclaim = storagePool.freeSpaceToShrinkPool(            math.min(extraMemoryNeeded, memoryReclaimableFromStorage))          storagePool.decrementPoolSize(spaceToReclaim)          executionPool.incrementPoolSize(spaceToReclaim)        }      }    }    /**     * The size the execution pool would have after evicting storage memory.     *     * The execution memory pool divides this quantity among the active tasks evenly to cap     * the execution memory allocation for each task. It is important to keep this greater     * than the execution pool size, which doesn't take into account potential memory that     * could be freed by evicting storage. Otherwise we may hit SPARK-12155.     *     * Additionally, this quantity should be kept below `maxMemory` to arbitrate fairness     * in execution memory allocation across tasks, Otherwise, a task may occupy more than     * its fair share of execution memory, mistakenly thinking that other tasks can acquire     * the portion of storage memory that cannot be evicted.     */    def computeMaxExecutionPoolSize(): Long = {      maxMemory - math.min(storagePool.memoryUsed, storageRegionSize)    }    executionPool.acquireMemory(      numBytes, taskAttemptId, maybeGrowExecutionPool, computeMaxExecutionPoolSize)  }

 override def acquireStorageMemory(      blockId: BlockId,      numBytes: Long,      memoryMode: MemoryMode): Boolean = synchronized {    assertInvariants()    assert(numBytes >= 0)    val (executionPool, storagePool, maxMemory) = memoryMode match {      case MemoryMode.ON_HEAP => (        onHeapExecutionMemoryPool,        onHeapStorageMemoryPool,        maxOnHeapStorageMemory)      case MemoryMode.OFF_HEAP => (        offHeapExecutionMemoryPool,        offHeapStorageMemoryPool,        maxOffHeapMemory)    }    if (numBytes > maxMemory) {      // Fail fast if the block simply won't fit      logInfo(s"Will not store $blockId as the required space ($numBytes bytes) exceeds our " +        s"memory limit ($maxMemory bytes)")      return false    }    if (numBytes > storagePool.memoryFree) {      // There is not enough free memory in the storage pool, so try to borrow free memory from      // the execution pool.      val memoryBorrowedFromExecution = Math.min(executionPool.memoryFree, numBytes)      executionPool.decrementPoolSize(memoryBorrowedFromExecution)      storagePool.incrementPoolSize(memoryBorrowedFromExecution)    }    storagePool.acquireMemory(blockId, numBytes)  }

  override def acquireUnrollMemory(      blockId: BlockId,      numBytes: Long,      memoryMode: MemoryMode): Boolean = synchronized {    acquireStorageMemory(blockId, numBytes, memoryMode)  }}


object UnifiedMemoryManager {  // Set aside a fixed amount of memory for non-storage, non-execution purposes.  // This serves a function similar to `spark.memory.fraction`, but guarantees that we reserve  // sufficient memory for the system even for small heaps. E.g. if we have a 1GB JVM, then  // the memory used for execution and storage will be (1024 - 300) * 0.6 = 434MB by default.  private val RESERVED_SYSTEM_MEMORY_BYTES = 300 * 1024 * 1024  def apply(conf: SparkConf, numCores: Int): UnifiedMemoryManager = {    val maxMemory = getMaxMemory(conf)    new UnifiedMemoryManager(      conf,      maxHeapMemory = maxMemory,      onHeapStorageRegionSize =        (maxMemory * conf.getDouble("spark.memory.storageFraction", 0.5)).toLong,      numCores = numCores)  }  /**   * Return the total amount of memory shared between execution and storage, in bytes.   */  private def getMaxMemory(conf: SparkConf): Long = {    val systemMemory = conf.getLong("spark.testing.memory", Runtime.getRuntime.maxMemory)    val reservedMemory = conf.getLong("spark.testing.reservedMemory",      if (conf.contains("spark.testing")) 0 else RESERVED_SYSTEM_MEMORY_BYTES)    val minSystemMemory = (reservedMemory * 1.5).ceil.toLong    if (systemMemory < minSystemMemory) {      throw new IllegalArgumentException(s"System memory $systemMemory must " +        s"be at least $minSystemMemory. Please increase heap size using the --driver-memory " +        s"option or spark.driver.memory in Spark configuration.")    }    // SPARK-12759 Check executor memory to fail fast if memory is insufficient    if (conf.contains("spark.executor.memory")) {      val executorMemory = conf.getSizeAsBytes("spark.executor.memory")      if (executorMemory < minSystemMemory) {        throw new IllegalArgumentException(s"Executor memory $executorMemory must be at least " +          s"$minSystemMemory. Please increase executor memory using the " +          s"--executor-memory option or spark.executor.memory in Spark configuration.")      }    }    val usableMemory = systemMemory - reservedMemory    val memoryFraction = conf.getDouble("spark.memory.fraction", 0.6)    (usableMemory * memoryFraction).toLong  }}



0 0