Spark进阶学习——Stage划分

来源：互联网发布：手机淘宝好评率编辑：程序博客网时间：2024/06/04 20:44

本文要解决的问题：

由于近期重点需要研究Spark的调度优化问题，所以对这一块进行重点分析。
本文主要讨论Spark中Stage的划分。

当rdd触发action操作之后，会调用SparkContext的runJob方法，最后调用的DAGScheduler.handleJobSubmitted方法完成整个job的提交。然后DAGScheduler根据RDD的lineage进行Stage划分，再生成TaskSet，由TaskScheduler向集群申请资源，最终在Woker节点的Executor进程中执行Task。

我们首先看一下如何进行Stage划分。下图给出的是对应Spark应用程序代码生成的Stage。它的具体划分依据是根据RDD的依赖关系进行，在遇到宽依赖时将两个RDD划分为不同的Stage。

这里写图片描述

从上图中可以看到，RDD G与RDD F间的依赖是宽依赖，所以RDD F与 RDD G被划分为不同的Stage，而RDD G 与 RDD 间为窄依赖，因此 RDD B 与 RDD G被划分为同一个Stage。通过这种递归的调用方式，将所有RDD进行划分。

//DAGScheduler.handleJobSubmitted方法 //参数finalRDD为触发action操作时最后一个RDDprivate[scheduler] def handleJobSubmitted(jobId: Int,      finalRDD: RDD[_],      func: (TaskContext, Iterator[_]) => _,      partitions: Array[Int],      callSite: CallSite,      listener: JobListener,      properties: Properties) {    //finalStage表示最后一个Stage    var finalStage: ResultStage = null    try {      // New stage creation may throw an exception if, for example, jobs are run on a      // HadoopRDD whose underlying HDFS files have been deleted.      //调用newResultStage创建Final Stage      finalStage = newResultStage(finalRDD, partitions.length, jobId, callSite)    } catch {      case e: Exception =>        logWarning("Creating new stage failed due to exception - job: " + jobId, e)        listener.jobFailed(e)        return    }    //如果存在finalStage     if (finalStage != null) {      val job = new ActiveJob(jobId, finalStage, func, partitions, callSite, listener, properties)      clearCacheLocs()      logInfo("Got job %s (%s) with %d output partitions".format(        job.jobId, callSite.shortForm, partitions.length))      logInfo("Final stage: " + finalStage + "(" + finalStage.name + ")")      logInfo("Parents of final stage: " + finalStage.parents)      logInfo("Missing parents: " + getMissingParentStages(finalStage))      val jobSubmissionTime = clock.getTimeMillis()      jobIdToActiveJob(jobId) = job      activeJobs += job      finalStage.resultOfJob = Some(job)      val stageIds = jobIdToStageIds(jobId).toArray      val stageInfos = stageIds.flatMap(id => stageIdToStage.get(id).map(_.latestInfo))      //向LiveListenerBus post SparkListenerJobStart      //listenerThread后台线程处理该事件      listenerBus.post(        SparkListenerJobStart(job.jobId, jobSubmissionTime, stageInfos, properties))      //提交finalStage，该方法会提交所有关联的未提交的stage      submitStage(finalStage)    }    //检查是否有等待或失败的Stage需要重新提交    submitWaitingStages()  }

重点关注newResultStage方法

/**   * Create a ResultStage associated with the provided jobId.   */  private def newResultStage(      rdd: RDD[_],      numTasks: Int,      jobId: Int,      callSite: CallSite): ResultStage = {     //获取Parent Stages及ID    val (parentStages: List[Stage], id: Int) = getParentStagesAndId(rdd, jobId)    //创建ResultStage     val stage: ResultStage = new ResultStage(id, rdd, numTasks, parentStages, jobId, callSite)    stageIdToStage(id) = stage    updateJobIdStageIdMaps(jobId, stage)    stage  }

跳到getParentStagesAndId方法

/**   * Helper function to eliminate some code re-use when creating new stages.   */  private def getParentStagesAndId(rdd: RDD[_], firstJobId: Int): (List[Stage], Int) = {    //根据宽依赖中的ShuffleDependency生成ParentStage    val parentStages = getParentStages(rdd, firstJobId)    //生成StageID    val id = nextStageId.getAndIncrement()    (parentStages, id)  }

跳到getParentStages方法

/**   * Get or create the list of parent stages for a given RDD.  The new Stages will be created with   * the provided firstJobId.   */  private def getParentStages(rdd: RDD[_], firstJobId: Int): List[Stage] = {    val parents = new HashSet[Stage]    val visited = new HashSet[RDD[_]]    // We are manually maintaining a stack here to prevent StackOverflowError    // caused by recursively visiting    val waitingForVisit = new Stack[RDD[_]]    //广度优先遍历方式    def visit(r: RDD[_]) {      if (!visited(r)) {        visited += r        // Kind of ugly: need to register RDDs with the cache here since        // we can't do it in its constructor because # of partitions is unknown        for (dep <- r.dependencies) {          dep match {            //如何是ShuffleDependency，则生成一个新的stage            case shufDep: ShuffleDependency[_, _, _] =>              parents += getShuffleMapStage(shufDep, firstJobId)            case _ =>              waitingForVisit.push(dep.rdd)          }        }      }    }    waitingForVisit.push(rdd)    while (waitingForVisit.nonEmpty) {      visit(waitingForVisit.pop())    }    parents.toList  }

再跳到getShuffleMapStage方法

/**   * Get or create a shuffle map stage for the given shuffle dependency's map side.   */  private def getShuffleMapStage(      shuffleDep: ShuffleDependency[_, _, _],      firstJobId: Int): ShuffleMapStage = {    shuffleToMapStage.get(shuffleDep.shuffleId) match {      //如何存在，则不需要创建，原样返回      case Some(stage) => stage      //如果不存在，则创建      case None =>        // We are going to register ancestor shuffle dependencies        //查看其依赖的父类Stage是否存在，没有的话创建        registerShuffleDependencies(shuffleDep, firstJobId)        // Then register current shuffleDep        //创建当前shuffleDep RDD的Stage        val stage = newOrUsedShuffleStage(shuffleDep, firstJobId)        shuffleToMapStage(shuffleDep.shuffleId) = stage        stage    }  }

0 0