Spark源码解读之Job提交

来源：互联网发布：苹果6splus应用与数据编辑：程序博客网时间：2024/05/16 10:44

上一篇讲解了Spark源码解读之RDD构建和转换过程，上一篇的RDD操作都是transform，也就是说结果会是一个新的RDD，并不会进行真正地计算，真正会引发Spark计算操作时action。比如first、count、collect。

WordCounts.collect()会真正出发一个Job的执行。

调用流程：

org.apache.spark.SparkContext.runJob

org.apache.spark.scheduler.DAGScheduler.runJob

org.apache.spark.scheduler.DAGScheduler.submitJob

org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.post

org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.run

org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive

org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive

org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted

RDD的collect源码如下：

  // 返回RDD所包含的所有元素  def collect(): Array[T] = withScope {    val results = sc.runJob(this, (iter: Iterator[T]) => iter.toArray)    Array.concat(results: _*)  }

内部直接调用SparkContext的runJob方法，SparkContext中的runJob方法进行了重载，这儿会从最开始的一个runJob方法调用到最后的一个runJob方法，源码如下：

  /**  * rdd为最后转换后生成的ShuffledRDD  * func为(ctx: TaskContext, it: Iterator[T]) => cleanedFunc(it),cleanedFunc为val cleanedFunc = clean(func)  * partitions为0 until rdd.partitions.length  * resultHandler为(index, res) => results(index) = res,results为val results = new Array[U](partitions.size)  */  def runJob[T, U: ClassTag](      rdd: RDD[T],      func: (TaskContext, Iterator[T]) => U,      partitions: Seq[Int],       resultHandler: (Int, U) => Unit): Unit = {    if (stopped.get()) {      throw new IllegalStateException("SparkContext has been shutdown")    }    val callSite = getCallSite    val cleanedFunc = clean(func)    logInfo("Starting job: " + callSite.shortForm)    if (conf.getBoolean("spark.logLineage", false)) {      logInfo("RDD's recursive dependencies:\n" + rdd.toDebugString)    }    // 调用dagScheduler的runJob方法    dagScheduler.runJob(rdd, cleanedFunc, partitions, callSite, resultHandler, localProperties.get)    progressBar.foreach(_.finishAll())    // 对RDD进行checkpoint    rdd.doCheckpoint()  }

DAGScheduler中的runJob方法，该方法中通过submitJob方法进行任务的提交，具体源码如下：

  def runJob[T, U](      rdd: RDD[T],      func: (TaskContext, Iterator[T]) => U,      partitions: Seq[Int],      callSite: CallSite,      resultHandler: (Int, U) => Unit,      properties: Properties): Unit = {    val start = System.nanoTime    // 调用DAGScheduler中的submitJob方法，返回JobWaiter对象，该对象等待job完成，完成后调用resultHandler函数进行后续处理    val waiter = submitJob(rdd, func, partitions, callSite, resultHandler, properties)    waiter.awaitResult() match {    // 成功      case JobSucceeded =>        logInfo("Job %d finished: %s, took %f s".format          (waiter.jobId, callSite.shortForm, (System.nanoTime - start) / 1e9))      // 失败      case JobFailed(exception: Exception) =>        logInfo("Job %d failed: %s, took %f s".format          (waiter.jobId, callSite.shortForm, (System.nanoTime - start) / 1e9))        // SPARK-8644: Include user stack trace in exceptions coming from DAGScheduler.        val callerStackTrace = Thread.currentThread().getStackTrace.tail        exception.setStackTrace(exception.getStackTrace ++ callerStackTrace)        throw exception    }  }

submitJob方法源码如下：

  def submitJob[T, U](      rdd: RDD[T],      func: (TaskContext, Iterator[T]) => U,      partitions: Seq[Int],      callSite: CallSite,      resultHandler: (Int, U) => Unit,      properties: Properties): JobWaiter[U] = {    // Check to make sure we are not launching a task on a partition that does not exist.    // 检查任务，确保所执行的任务所在的partition存在    val maxPartitions = rdd.partitions.length    partitions.find(p => p >= maxPartitions || p < 0).foreach { p =>      throw new IllegalArgumentException(        "Attempting to access a non-existent partition: " + p + ". " +          "Total number of partitions: " + maxPartitions)    }// 唯一的Job Id    val jobId = nextJobId.getAndIncrement()    if (partitions.size == 0) {      // Return immediately if the job is running 0 tasks      return new JobWaiter[U](this, jobId, 0, resultHandler)    }    assert(partitions.size > 0)    val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]    // 创建JobWaiter，该JobWaiter会被阻塞，直到Job完成或取消    val waiter = new JobWaiter(this, jobId, partitions.size, resultHandler)    // eventProcessLoop是DAGSchedulerEventProcessLoop类的实例，调用post方法提交JobSubmitted到event队列，eventThread后台进程会对该任务进行提交处理    eventProcessLoop.post(JobSubmitted(      jobId, rdd, func2, partitions.toArray, callSite, waiter,      SerializationUtils.clone(properties)))    waiter  }

DAGSchedulerEventProcessLoop继承EventLoop，先来看下EventLoop类源码：

/** * An event loop to receive events from the caller and process all events in the event thread. It * will start an exclusive event thread to process all events. * * Note: The event queue will grow indefinitely. So subclasses should make sure `onReceive` can * handle events in time to avoid the potential OOM. */private[spark] abstract class EventLoop[E](name: String) extends Logging {  private val eventQueue: BlockingQueue[E] = new LinkedBlockingDeque[E]()  private val stopped = new AtomicBoolean(false)  private val eventThread = new Thread(name) {    setDaemon(true)    override def run(): Unit = {      try {        while (!stopped.get) {          val event = eventQueue.take()          try {            onReceive(event) // 抽象方法，由子类去实现          } catch {            case NonFatal(e) => {              try {                onError(e)              } catch {                case NonFatal(e) => logError("Unexpected error in " + name, e)              }            }          }        }      } catch {        case ie: InterruptedException => // exit even if eventQueue is not empty        case NonFatal(e) => logError("Unexpected error in " + name, e)      }    }  }  // 省略部分方法    /**   * Put the event into the event queue. The event thread will process it later.   */  def post(event: E): Unit = {    eventQueue.put(event)  }  /**   * Invoked in the event thread when polling events from the event queue.   *   * Note: Should avoid calling blocking actions in `onReceive`, or the event thread will be blocked   * and cannot process events in time. If you want to call some blocking actions, run them in   * another thread.   */  protected def onReceive(event: E): Unit  }

从EventLoop类的源码可以看出来EventLoop中包含了一个eventThread的后台线程，该线程的作用是从队列中拿到任务，然后对其调用onReceive方法。此外，post方法是将事件提交到队列中。由于onReceive是一个抽象方法，具体实现需要看其子类，下面是DAGSchedulerEventProcessLoop的部分源码：

private[scheduler] class DAGSchedulerEventProcessLoop(dagScheduler: DAGScheduler)  extends EventLoop[DAGSchedulerEvent]("dag-scheduler-event-loop") with Logging {  private[this] val timer = dagScheduler.metricsSource.messageProcessingTimer  /**   * The main event loop of the DAG scheduler.   * 在使用post方法提交后，eventThread进程会将调用该方法进行处理   */  override def onReceive(event: DAGSchedulerEvent): Unit = {    val timerContext = timer.time()    try {      doOnReceive(event)    } finally {      timerContext.stop()    }  }  private def doOnReceive(event: DAGSchedulerEvent): Unit = event match {    case JobSubmitted(jobId, rdd, func, partitions, callSite, listener, properties) =>      dagScheduler.handleJobSubmitted(jobId, rdd, func, partitions, callSite, listener, properties)    case MapStageSubmitted(jobId, dependency, callSite, listener, properties) =>      dagScheduler.handleMapStageSubmitted(jobId, dependency, callSite, listener, properties)    case StageCancelled(stageId) =>      dagScheduler.handleStageCancellation(stageId)    case JobCancelled(jobId) =>      dagScheduler.handleJobCancellation(jobId)    case JobGroupCancelled(groupId) =>      dagScheduler.handleJobGroupCancelled(groupId)    case AllJobsCancelled =>      dagScheduler.doCancelAllJobs()    case ExecutorAdded(execId, host) =>      dagScheduler.handleExecutorAdded(execId, host)    case ExecutorLost(execId) =>      dagScheduler.handleExecutorLost(execId, fetchFailed = false)    case BeginEvent(task, taskInfo) =>      dagScheduler.handleBeginEvent(task, taskInfo)    case GettingResultEvent(taskInfo) =>      dagScheduler.handleGetTaskResult(taskInfo)    case completion @ CompletionEvent(task, reason, _, _, taskInfo, taskMetrics) =>      dagScheduler.handleTaskCompletion(completion)    case TaskSetFailed(taskSet, reason, exception) =>      dagScheduler.handleTaskSetFailed(taskSet, reason, exception)    case ResubmitFailedStages =>      dagScheduler.resubmitFailedStages()  }}

从上面的分析可以看出来，Spark Job的提交最后会由名为“dag-scheduler-event-loop”的后台进程去处理，然后调用dagScheduler.handleJobSubmitted完成Job的最终提交，之后下一节将讲解Job如何划分Stage。

0 0