spark core 1.6.0 源码分析10 Task的运行

来源：互联网发布：企业报表软件编辑：程序博客网时间：2024/05/17 18:25

org.apache.spark.executorExecutor

下面：TaskRunner

override def run(): Unit = {      val taskMemoryManager = new TaskMemoryManager(env.memoryManager, taskId)      val deserializeStartTime = System.currentTimeMillis()      Thread.currentThread.setContextClassLoader(replClassLoader)      val ser = env.closureSerializer.newInstance()      logInfo(s"Running $taskName (TID $taskId)")      execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)//这个就是就是向Driver发送StatusUpdate方法，状态是RUNNING      var taskStart: Long = 0      startGCTime = computeTotalGcTime()      try {        val (taskFiles, taskJars, taskBytes) = Task.deserializeWithDependencies(serializedTask)//将serializedTask解析出来        updateDependencies(taskFiles, taskJars)//下载运行task需要的jar，文件等        task = ser.deserialize[Task[Any]](taskBytes, Thread.currentThread.getContextClassLoader)//把真正的task反序列化出来        task.setTaskMemoryManager(taskMemoryManager)        // If this task has been killed before we deserialized it, let's quit now. Otherwise,        // continue executing the task.        if (killed) {          // Throw an exception rather than returning, because returning within a try{} block          // causes a NonLocalReturnControl exception to be thrown. The NonLocalReturnControl          // exception will be caught by the catch block, leading to an incorrect ExceptionFailure          // for the task.          throw new TaskKilledException        }        logDebug("Task " + taskId + "'s epoch is " + task.epoch)        env.mapOutputTracker.updateEpoch(task.epoch)        // Run the actual task and measure its runtime.        taskStart = System.currentTimeMillis()        var threwException = true        val (value, accumUpdates) = try {          val res = task.run(//任务执行            taskAttemptId = taskId,            attemptNumber = attemptNumber,            metricsSystem = env.metricsSystem)          threwException = false          res        } finally {          val freedMemory = taskMemoryManager.cleanUpAllAllocatedMemory()//释放缓存          if (freedMemory > 0) {            val errMsg = s"Managed memory leak detected; size = $freedMemory bytes, TID = $taskId"            if (conf.getBoolean("spark.unsafe.exceptionOnMemoryLeak", false) && !threwException) {              throw new SparkException(errMsg)            } else {              logError(errMsg)            }          }        }        val taskFinish = System.currentTimeMillis()        // If the task has been killed, let's fail it.        if (task.killed) {          throw new TaskKilledException        }        val resultSer = env.serializer.newInstance()        val beforeSerialization = System.currentTimeMillis()        val valueBytes = resultSer.serialize(value)//将task运行结果序列化        val afterSerialization = System.currentTimeMillis()        for (m <- task.metrics) {          // Deserialization happens in two parts: first, we deserialize a Task object, which          // includes the Partition. Second, Task.run() deserializes the RDD and function to be run.          m.setExecutorDeserializeTime(            (taskStart - deserializeStartTime) + task.executorDeserializeTime)          // We need to subtract Task.run()'s deserialization time to avoid double-counting          m.setExecutorRunTime((taskFinish - taskStart) - task.executorDeserializeTime)          m.setJvmGCTime(computeTotalGcTime() - startGCTime)          m.setResultSerializationTime(afterSerialization - beforeSerialization)          m.updateAccumulators()        }        val directResult = new DirectTaskResult(valueBytes, accumUpdates, task.metrics.orNull)        val serializedDirectResult = ser.serialize(directResult)        val resultSize = serializedDirectResult.limit//这里将最终结果序列化成serializedDirectResult，并根据这个序列化之后的大小区分处理        // directSend = sending directly back to the driver        val serializedResult: ByteBuffer = {//最终结果序列化之后>1G          if (maxResultSize > 0 && resultSize > maxResultSize) {            logWarning(s"Finished $taskName (TID $taskId). Result is larger than maxResultSize " +              s"(${Utils.bytesToString(resultSize)} > ${Utils.bytesToString(maxResultSize)}), " +              s"dropping it.")            ser.serialize(new IndirectTaskResult[Any](TaskResultBlockId(taskId), resultSize))          } else if (resultSize >= akkaFrameSize - AkkaUtils.reservedSizeBytes) {//最终结果序列化之后>10M,把序列化的结果作为一个Block存放在BlockManager里,而后将BlockManager返回的BlockID放在IndirectTaskResult对象中            val blockId = TaskResultBlockId(taskId)            env.blockManager.putBytes(              blockId, serializedDirectResult, StorageLevel.MEMORY_AND_DISK_SER)            logInfo(              s"Finished $taskName (TID $taskId). $resultSize bytes result sent via BlockManager)")            ser.serialize(new IndirectTaskResult[Any](blockId, resultSize))          } else {//小数据可以直接处理            logInfo(s"Finished $taskName (TID $taskId). $resultSize bytes result sent to driver")            serializedDirectResult          }        }        execBackend.statusUpdate(taskId, TaskState.FINISHED, serializedResult)      } catch {        case ffe: FetchFailedException =>          val reason = ffe.toTaskEndReason          execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))        case _: TaskKilledException | _: InterruptedException if task.killed =>          logInfo(s"Executor killed $taskName (TID $taskId)")          execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(TaskKilled))        case cDE: CommitDeniedException =>          val reason = cDE.toTaskEndReason          execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))        case t: Throwable =>          // Attempt to exit cleanly by informing the driver of our failure.          // If anything goes wrong (or this was a fatal exception), we will delegate to          // the default uncaught exception handler, which will terminate the Executor.          logError(s"Exception in $taskName (TID $taskId)", t)          val metrics: Option[TaskMetrics] = Option(task).flatMap { task =>            task.metrics.map { m =>              m.setExecutorRunTime(System.currentTimeMillis() - taskStart)              m.setJvmGCTime(computeTotalGcTime() - startGCTime)              m.updateAccumulators()              m            }          }          val serializedTaskEndReason = {            try {              ser.serialize(new ExceptionFailure(t, metrics))            } catch {              case _: NotSerializableException =>                // t is not serializable so just send the stacktrace                ser.serialize(new ExceptionFailure(t, metrics, false))            }          }          execBackend.statusUpdate(taskId, TaskState.FAILED, serializedTaskEndReason)          // Don't forcibly exit unless the exception was inherently fatal, to avoid          // stopping other tasks unnecessarily.          if (Utils.isFatalError(t)) {            SparkUncaughtExceptionHandler.uncaughtException(t)          }      } finally {        runningTasks.remove(taskId)      }    }  }

Task.run

final def run(    taskAttemptId: Long,    attemptNumber: Int,    metricsSystem: MetricsSystem)  : (T, AccumulatorUpdates) = {    context = new TaskContextImpl(      stageId,      partitionId,      taskAttemptId,      attemptNumber,      taskMemoryManager,      metricsSystem,      internalAccumulators,      runningLocally = false)    TaskContext.setTaskContext(context)    context.taskMetrics.setHostname(Utils.localHostName())    context.taskMetrics.setAccumulatorsUpdater(context.collectInternalAccumulators)    taskThread = Thread.currentThread()    if (_killed) {      kill(interruptThread = false)    }    try {      (runTask(context), context.collectAccumulators())    } finally {      context.markTaskCompleted()      try {        Utils.tryLogNonFatalError {          // Release memory used by this thread for unrolling blocks          SparkEnv.get.blockManager.memoryStore.releaseUnrollMemoryForThisTask()          // Notify any tasks waiting for execution memory to be freed to wake up and try to          // acquire memory again. This makes impossible the scenario where a task sleeps forever          // because there are no other tasks left to notify it. Since this is safe to do but may          // not be strictly necessary, we should revisit whether we can remove this in the future.          val memoryManager = SparkEnv.get.memoryManager          memoryManager.synchronized { memoryManager.notifyAll() }        }      } finally {        TaskContext.unset()      }    }  }

了解下reduceByKey,方法在PairRDDFunctions中

  /**   * Merge the values for each key using an associative reduce function. This will also perform   * the merging locally on each mapper before sending results to a reducer, similarly to a   * "combiner" in MapReduce.   */  def reduceByKey(partitioner: Partitioner, func: (V, V) => V): RDD[(K, V)] = self.withScope {    combineByKeyWithClassTag[V]((v: V) => v, func, func, partitioner)  }  /**   * Merge the values for each key using an associative reduce function. This will also perform   * the merging locally on each mapper before sending results to a reducer, similarly to a   * "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions.   */  def reduceByKey(func: (V, V) => V, numPartitions: Int): RDD[(K, V)] = self.withScope {    reduceByKey(new HashPartitioner(numPartitions), func)  }  /**   * Merge the values for each key using an associative reduce function. This will also perform   * the merging locally on each mapper before sending results to a reducer, similarly to a   * "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/   * parallelism level.   */  def reduceByKey(func: (V, V) => V): RDD[(K, V)] = self.withScope {    reduceByKey(defaultPartitioner(self), func)  }

这里有3个不同的reduceByKey方法。我们可以手动设定reduce的个数，如果不指定的话，就可能不受控制了。
如果不指定reduce个数的话，规则如下：
1、如果自定义了分区函数partitioner的话，就按你的分区函数来走。
2、如果没有定义分区函数而是定义了reduce个数的话，默认分区函数就是根据reduce个数生成HashPartitioner
3、如果这个也没设置，那就按照reduce个数是"spark.default.parallelism"或者rdd.head.partitions.size来生成HashPartitioner

reduceByKey只是一个transformation，真正发挥作用是在action触发之后

还有两个比较重要的ShuffleMapTask和ResultTask，有时间看看他们的runtask方法

0 0