spark学习-63-源代码：schedulerBackend和taskScheduler的创建(1)-local

来源：互联网发布：软件定义卫星编辑：程序博客网时间：2024/05/21 22:47

1. schedulerBackend和taskScheduler的创建是在创建SparkContext的时候

createTaskScheduler方法会根据master的配置陪陪部署模式，创建TaskSchedulerImpl，并且生成不同的SchedulerBackend。

/**      * Create and start the scheduler  创建和启动任务调度      * 创建SparkDeployScheduler和TaskSchedulerImpl      *      * 这里才是真正创建      * schedulerBackend      * taskScheduler      * dagScheduler      *      * createTaskScheduler方法会根据master的配置陪陪部署模式，创建TaskSchedulerImpl，并且生成不同的SchedulerBackend。      * */    val (sched, ts) = SparkContext.createTaskScheduler(this, master, deployMode)    _schedulerBackend = sched   //生成 schedulerBackend    _taskScheduler = ts  //生成 taskScheduler

2.根据master生成不同的SchedulerBackend

/**   * Create a task scheduler based on a given master URL.   * Return a 2-tuple of the scheduler backend and the task scheduler.    * 创建一个调度器基于给定的一个主节点的URL    * 返回两个东西  调度器后台 和 任务调度器    *    * TaskScheduler也是SparkContext的重要组成部分，负责任务的提交，并且请求集群管理器对人物的调度。    * TaskScheduler也可以看做是任务调度的客户端。    *    * createTaskScheduler方法会根据master的匹配部署模式，创建TaskSchedulerImpl,并且生成不同的SchedulerBackend.    *   */  private def createTaskScheduler(      sc: SparkContext,      master: String,      deployMode: String): (SchedulerBackend, TaskScheduler) = {    import SparkMasterRegex._    // When running locally, don't try to re-execute tasks on failure. 在本地运行时，不要尝试重新执行失败的任务。    val MAX_LOCAL_TASK_FAILURES = 1    // 根据不同的部署方式，生成不同的scheduler和backend，现在主要是跟踪Standlone部署下的scheduler和backend的生成    master match {      case "local" =>  // 单机模式        val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)        val backend = new LocalSchedulerBackend(sc.getConf, scheduler, 1)        scheduler.initialize(backend)        (backend, scheduler)      // 单机模式，包含几个core，其中threads代表core数目      case LOCAL_N_REGEX(threads) =>        def localCpuCount: Int = Runtime.getRuntime.availableProcessors()        // local[*] estimates the number of cores on the machine; local[N] uses exactly N threads.        val threadCount = if (threads == "*") localCpuCount else threads.toInt        if (threadCount <= 0) {          throw new SparkException(s"Asked to run locally with $threadCount threads")        }        val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)        val backend = new LocalSchedulerBackend(sc.getConf, scheduler, threadCount)        scheduler.initialize(backend)        (backend, scheduler)      case LOCAL_N_FAILURES_REGEX(threads, maxFailures) =>        def localCpuCount: Int = Runtime.getRuntime.availableProcessors()        // local[*, M] means the number of cores on the computer with M failures        // local[N, M] means exactly N threads with M failures        val threadCount = if (threads == "*") localCpuCount else threads.toInt        val scheduler = new TaskSchedulerImpl(sc, maxFailures.toInt, isLocal = true)        val backend = new LocalSchedulerBackend(sc.getConf, scheduler, threadCount)        scheduler.initialize(backend)        (backend, scheduler)        // 类似这样的masterurl -> spark://192.168.10.173:7077 其中TaskSchedulerImpl用于记录task的调度信息，真正        // task下发给work上的executor的是backend      case SPARK_REGEX(sparkUrl) =>        val scheduler = new TaskSchedulerImpl(sc)        val masterUrls = sparkUrl.split(",").map("spark://" + _)        // 实例化SparkDeploySchedulerBackend(Standalone模式)        val backend = new StandaloneSchedulerBackend(scheduler, sc, masterUrls)        // 执行TaskSchedulerImpl的初始化操作        scheduler.initialize(backend)        (backend, scheduler)        // 这一点请看：http://blog.csdn.net/qq_21383435/article/details/78900834      case LOCAL_CLUSTER_REGEX(numSlaves, coresPerSlave, memoryPerSlave) =>        // Check to make sure memory requested <= memoryPerSlave. Otherwise Spark will just hang.        val memoryPerSlaveInt = memoryPerSlave.toInt        if (sc.executorMemory > memoryPerSlaveInt) {          throw new SparkException(            "Asked to launch cluster with %d MB RAM / worker but requested %d MB/worker".format(              memoryPerSlaveInt, sc.executorMemory))        }        val scheduler = new TaskSchedulerImpl(sc)        val localCluster = new LocalSparkCluster(          numSlaves.toInt, coresPerSlave.toInt, memoryPerSlaveInt, sc.conf)        val masterUrls = localCluster.start()        val backend = new StandaloneSchedulerBackend(scheduler, sc, masterUrls)        scheduler.initialize(backend)        backend.shutdownCallback = (backend: StandaloneSchedulerBackend) => {          localCluster.stop()        }        (backend, scheduler)      case masterUrl =>        val cm = getClusterManager(masterUrl) match {          case Some(clusterMgr) => clusterMgr          case None => throw new SparkException("Could not parse Master URL: '" + master + "'")        }        try {          val scheduler = cm.createTaskScheduler(sc, masterUrl)          val backend = cm.createSchedulerBackend(sc, masterUrl, scheduler)          cm.initialize(scheduler, backend)          (backend, scheduler)        } catch {          case se: SparkException => throw se          case NonFatal(e) =>            throw new SparkException("External scheduler cannot be instantiated", e)        }    }  }

3.先看看local模式的，这个不带任何线程，只有一个

手工代码

  // 本地模式运行,便于测试  val sparkConf = new SparkConf(),master("local")  // 创建 spark context  val sc = new SparkContext(sparkConf)

// 根据不同的部署方式，生成不同的scheduler和backend，现在主要是跟踪Standlone部署下的scheduler和backend的生成    master match {      case "local" =>  // 单机模式        val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)        val backend = new LocalSchedulerBackend(sc.getConf, scheduler, 1)        scheduler.initialize(backend)        (backend, scheduler)

TaskSchedulerImpl的详情请看：
这里我们主要看LocalSchedulerBackend，看看这个程序的继承

private[spark] class LocalSchedulerBackend(    conf: SparkConf,    scheduler: TaskSchedulerImpl,    val totalCores: Int)  extends SchedulerBackend with ExecutorBackend with Logging {

一般看源代码，要先看继承的类，先看SchedulerBackend

/** * A backend interface for scheduling systems that allows plugging in different ones under * TaskSchedulerImpl. We assume a Mesos-like model where the application gets resource offers as * machines become available and can launch tasks on them.  *  * 用于调度系统的后端接口，允许在TaskSchedulerImpl中插入不同的系统。我们假设一个类似mesos的模型，当机器可用时，  * 应用程序可以获得资源，并可以在它们上启动任务。  *  * 在TaskScheduler下层，用于对接不同的资源管理系统，SchedulerBackend是个接口.  *  * SchedulerBackend, 两个任务, 申请资源和task执行和管理  *  * SchedulerBackend的实现用于与底层资源调度系统交互（如mesos/YARN），配合TaskScheduler实现具体任务执行所需的资源分配，  * 核心接口是receiveOffers */private[spark] trait SchedulerBackend {  private val appId = "spark-application-" + System.currentTimeMillis  def start(): Unit  def stop(): Unit  // 重要方法：SchedulerBackend把自己手头上的可用资源交给TaskScheduler，TaskScheduler根据调度策略分配给排队的任务吗，  // 返回一批可执行的任务描述，SchedulerBackend负责launchTask，即最终把task塞到了executor模型上，executor里的线程池  // 会执行task的run()  def reviveOffers(): Unit  def defaultParallelism(): Int  /**   * Requests that an executor kills a running task. 请求执行器杀死正在运行的任务。   *   * @param taskId Id of the task.   * @param executorId Id of the executor the task is running on.   * @param interruptThread Whether the executor should interrupt the task thread.   * @param reason The reason for the task kill.   */  def killTask(      taskId: Long,      executorId: String,      interruptThread: Boolean,      reason: String): Unit =    throw new UnsupportedOperationException  def isReady(): Boolean = true  /**   * Get an application ID associated with the job. 获取与该作业相关的应用程序ID。   *   * @return An application ID   */  def applicationId(): String = appId  /**   * Get the attempt ID for this run, if the cluster manager supports multiple   * attempts. Applications run in client mode will not have attempt IDs.    *    * 如果集群管理器支持多次尝试，则获取该运行的尝试ID。在客户端模式下运行的应用程序将不会有尝试id。   *   * @return The application attempt id, if available.   */  def applicationAttemptId(): Option[String] = None  /**   * Get the URLs for the driver logs. These URLs are used to display the links in the UI   * Executors tab for the driver.    *    * 获取驱动日志的url。这些url用于显示驱动程序的UI executor选项卡中的链接。    *   * @return Map containing the log names and their respective URLs   */  def getDriverLogUrls: Option[Map[String, String]] = None}

然后看看ExecutorBackend

/** * A pluggable interface used by the Executor to send updates to the cluster scheduler.  * 执行器使用的可插入接口将更新发送到集群调度程序。 */private[spark] trait ExecutorBackend {  def statusUpdate(taskId: Long, state: TaskState, data: ByteBuffer): Unit}

获取appId和listenerBus

  private val appId = "local-" + System.currentTimeMillis  private var localEndpoint: RpcEndpointRef = null  private val userClassPath = getUserClasspath(conf)  private val listenerBus = scheduler.sc.listenerBus  private val launcherBackend = new LauncherBackend() {    override def onStopRequest(): Unit = stop(SparkAppHandle.State.KILLED)  }

本段代码不会

// 不晓得LauncherBackend这个类是干嘛的  launcherBackend.connect()

然后是start方法

 override def start() {    val rpcEnv = SparkEnv.get.rpcEnv    // 创建一个本地的LocalEndpoint    val executorEndpoint = new LocalEndpoint(rpcEnv, userClassPath, scheduler, this, totalCores)    // 使用一个name名称注册一个[[RpcEndpoint]]，并返回其[[RpcEndpointRef]] rpcEnv也相当于一个listenerbus的功能    localEndpoint = rpcEnv.setupEndpoint("LocalSchedulerBackendEndpoint", executorEndpoint)    // 发送SparkListenerExecutorAdded事件    listenerBus.post(SparkListenerExecutorAdded(      System.currentTimeMillis,      executorEndpoint.localExecutorId,      new ExecutorInfo(executorEndpoint.localExecutorHostname, totalCores, Map.empty)))    // 设置这个launcherBackend是为那个应用服务的    launcherBackend.setAppId(appId)    // 设置状态是正在运行    launcherBackend.setState(SparkAppHandle.State.RUNNING)  }

这里看这个代码

  // 创建一个本地的LocalEndpoint    val executorEndpoint = new LocalEndpoint(rpcEnv, userClassPath, scheduler, this, totalCores)

new 了一个LocalEndpoint

/** * Calls to [[LocalSchedulerBackend]] are all serialized through LocalEndpoint. Using an * RpcEndpoint makes the calls on [[LocalSchedulerBackend]] asynchronous, which is necessary * to prevent deadlock between [[LocalSchedulerBackend]] and the [[TaskSchedulerImpl]].  *  *  1.5 版本是LocalActor现在是LocalEndpoint  *  *  创建LocalActor的过程主要是构建本地的Executor */private[spark] class LocalEndpoint(    override val rpcEnv: RpcEnv,    userClassPath: Seq[URL],    scheduler: TaskSchedulerImpl,    executorBackend: LocalSchedulerBackend,    private val totalCores: Int)  extends ThreadSafeRpcEndpoint with Logging {  private var freeCores = totalCores  val localExecutorId = SparkContext.DRIVER_IDENTIFIER  val localExecutorHostname = "localhost"  // 在本地创建一个executor  private val executor = new Executor(    localExecutorId, localExecutorHostname, SparkEnv.get, userClassPath, isLocal = true)  // 处理一个Ref调用send或者reply发送过过来的消息  override def receive: PartialFunction[Any, Unit] = {    case ReviveOffers =>      // 分配资源 运行任务      reviveOffers()    case StatusUpdate(taskId, state, serializedData) =>      scheduler.statusUpdate(taskId, state, serializedData)      if (TaskState.isFinished(state)) {        // freeCores数量增加（每个task分配的CPU数目）        freeCores += scheduler.CPUS_PER_TASK        // 分配资源 运行任务        reviveOffers()      }      // 杀死任务    case KillTask(taskId, interruptThread, reason) =>      executor.killTask(taskId, interruptThread, reason)  }  override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {    case StopExecutor =>      executor.stop()      context.reply(true)  }  /**    * 处理步骤：    *   1.使用ExecutorId，ExecutorHostsName，freeCores(空闲CPU核数)创建WorkerOffer。    *   2.调用TaskSchedulerImpl的resourceOffers方法分配资源；    *   3.调用Executor的launchTask方法运行任务。    */  def reviveOffers() {    // WorkerOffer 表示执行器上可用的空闲资源。    val offers = IndexedSeq(new WorkerOffer(localExecutorId, localExecutorHostname, freeCores))    for (task <- scheduler.resourceOffers(offers).flatten) {    //分配资源      freeCores -= scheduler.CPUS_PER_TASK // 执行器上可用的空闲资源减少      executor.launchTask(executorBackend, task)  // 执行任务    }  }}

这里先看看LocalEndpoint做了什么，首先创建了一个在本地创建一个executor

// 在本地创建一个executor  private val executor = new Executor(    localExecutorId, localExecutorHostname, SparkEnv.get, userClassPath, isLocal = true)

Executor这个类中首先打印一行日志

//24=> 17/12/05 11:56:51 INFO Executor: Starting executor ID driver on host localhost  logInfo(s"Starting executor ID $executorId on host $executorHostname")

然后做一些是否为本地的检查

 // No ip or host:port - just hostname         不要IP或者 host:port  只要hostname主机名  Utils.checkHost(executorHostname, "Expected executed slave to be a hostname")  // must not have port specified.              不能指定端口。  assert (0 == Utils.parseHostPort(executorHostname)._2)  // Make sure the local hostname we report matches the cluster scheduler's name for this host  // 确保本地主机名与我们在集群调度的名字相同  Utils.setCustomHostname(executorHostname)  if (!isLocal) {    // Setup an uncaught exception handler for non-local mode.    // Make any thread terminations due to uncaught exceptions kill the entire    // executor process to avoid surprising stalls.    // 设置为非本地模式未捕获的异常处理程序。任何线程终端由于未捕获的异常杀死整个执行过程中避免意外的摊位。    Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler)  }

然后重点，在这个executor中生成了一个线程池，用来执行任务的

// Start worker thread pool   启动worker线程池  private val threadPool = {    val threadFactory = new ThreadFactoryBuilder()      .setDaemon(true)      .setNameFormat("Executor task launch worker-%d")      .setThreadFactory(new ThreadFactory {        override def newThread(r: Runnable): Thread =        // Use UninterruptibleThread to run tasks so that we can allow running codes without being        // interrupted by `Thread.interrupt()`. Some issues, such as KAFKA-1894, HADOOP-10622,        // will hang forever if some methods are interrupted.        // 使用uninterruptiblethread运行任务，这样我们可以允许运行代码不受`Thread.interrupt()`的影响。一些问题，        // 如kafka-1894，hadoop-10622，如果一些方法被中断了会永远挂着。          new UninterruptibleThread(r, "unused") // thread name will be set by ThreadFactoryBuilder      })      .build()    Executors.newCachedThreadPool(threadFactory).asInstanceOf[ThreadPoolExecutor]  }

然后是资源

 private val executorSource = new ExecutorSource(threadPool, executorId)

执行结果的大小限制

 // Max size of direct result. If task result is bigger than this, we use the block manager  // to send the result back.  // 直接结果的最大大小。如果任务结果大于此，我们使用块管理器将结果返回。  private val maxDirectResultSize = Math.min(    conf.getSizeAsBytes("spark.task.maxDirectResultSize", 1L << 20),    RpcUtils.maxMessageSizeBytes(conf))  // Limit of bytes for total size of results (default is 1GB)  // 对结果的总大小限制的字节数（默认为1GB）  private val maxResultSize = Utils.getMaxResultSize(conf)

上面是executor的准备阶段，什么都准备好了，就开始运行看executor.launchTask(executorBackend, task) // 执行任务的方法

/**    * 调用Executor的launchTask方法时，标志着任务执行阶段的开始。执行过程如下：    *    * 1.创建TaskRunner，并且将其与taskId,taskName及serializedTask(这三个都被TaskDescription对象包含了)    *   添加到runningTasks = new ConcurrentHashMap[Long,TaskRuuner]中。    * 2.TaskRunner实现了Runnable接口（Scala中成为继承Runnable特质），最后使用线程池执行TaskRunner.    *    */  def launchTask(context: ExecutorBackend, taskDescription: TaskDescription): Unit = {    //实例化一个TaskRunner对象来执行Task    val tr = new TaskRunner(context, taskDescription)    //将Task加入到正在运行的Task队列    runningTasks.put(taskDescription.taskId, tr)    threadPool.execute(tr)  }

这里主要看

 //实例化一个TaskRunner对象来执行Task    val tr = new TaskRunner(context, taskDescription)

TaskRunner中主要有一个run方法

/**      * 　　这个方法是Executor执行task的主要方法。Task在Executor中执行完成后，会通过向Driver发送StatusUpdate的消息来      * 通知Driver任务的状态更新为TaskState.FINISHED。      *      * 在Executor运行Task时，得到计算结果会存入org.apache.spark.scheduler.DirectTaskResult。在将结果传回Driver时，      * 会根据结果的大小有不同的策略：对于较大的结果，将其以taskId为key存入org.apache.storage.BlockManager，如果结果不大，      * 则直接回传给Driver。回传是通过AKKA来实现的，所以能够回传的值会有一个由AKKA限制的大小，这里涉及到一个参数      * spark.akka.frameSize，默认为128，单位为Byte，在源码中最终转换成了128MB。表示AKKA最大能传递的消息大小为128MB，      * 但是同时AKKA会保留一部分空间用于存储其他数据，这部分的大小为200KB，那么结果如果小于128MB - 200KB的话就可以直接返回该值，      * 否则的话，在不大于1G的情况下（可以通过参数spark.driver.maxResultSize来修改，默认为1g），会通过BlockManager来传递。      * 详细信息会在Executor模块中描述。完整情况如下：      * （1）如果结果大于1G，直接丢弃      * （2）如果结果小于等于1G，大于128MB - 200KB，通过BlockManager记录结果的tid和其他信息      * （3）如果结果小于128MB - 200 KB，直接返回该值      * */    override def run(): Unit = {      threadId = Thread.currentThread.getId      Thread.currentThread.setName(threadName)      // 返回Java虚拟机的线程系统的托管bean。      val threadMXBean = ManagementFactory.getThreadMXBean      //为我们的Task创建内存管理器 ==》 管理单个任务分配的内存。      val taskMemoryManager = new TaskMemoryManager(env.memoryManager, taskId)      //记录反序列化时间      val deserializeStartTime = System.currentTimeMillis()      //测试如果Java虚拟机支持当前线程的CPU时间度量。这个貌似很高深，不理解先放着？      val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {        threadMXBean.getCurrentThreadCpuTime      } else 0L      //加载具体类时需要用到ClassLoader      Thread.currentThread.setContextClassLoader(replClassLoader)      //创建序列化器      val ser = env.closureSerializer.newInstance()      logInfo(s"Running $taskName (TID $taskId)")      // 更新任务的 状态      // 开始执行Task，      // yarn-client模式下，调用CoarseGrainedExecutorBackend的statusUpdate方法      // 将该Task的运行状态置为RUNNING      // 调用ExecutorBackend#statusUpdate向Driver发信息汇报当前状态      execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)      //记录运行时间和GC信息      var taskStart: Long = 0      var taskStartCpu: Long = 0      startGCTime = computeTotalGcTime()      try {        // Must be set before updateDependencies() is called, in case fetching dependencies        // requires access to properties contained within (e.g. for access control).        // 必须在updateDependencies()调用之前设置，以防获取依赖关系需要访问包含在(例如访问控制)中的属性。        Executor.taskDeserializationProps.set(taskDescription.properties)        //下载Task运行缺少的依赖。        updateDependencies(taskDescription.addedFiles, taskDescription.addedJars)        //反序列化Task        task = ser.deserialize[Task[Any]](          taskDescription.serializedTask, Thread.currentThread.getContextClassLoader)        task.localProperties = taskDescription.properties        //设置Task运行时的MemoryManager        task.setTaskMemoryManager(taskMemoryManager)        // If this task has been killed before we deserialized it, let's quit now. Otherwise,        // continue executing the task.        // 如果在序列化之前杀掉任务了，那么我们退出，否则继续执行任务        // 要运行的任务，是不是指定被杀死了，比如，我提交了一个任务，刚刚提交发现错了，直接ctrl+c终止程序了，这时候任务还没运行，相当于指定这个任务被杀死        val killReason = reasonIfKilled        // 判断如果该task被kill了，直接抛出异常        if (killReason.isDefined) {          // Throw an exception rather than returning, because returning within a try{} block          // causes a NonLocalReturnControl exception to be thrown. The NonLocalReturnControl          // exception will be caught by the catch block, leading to an incorrect ExceptionFailure          // for the task.          throw new TaskKilledException(killReason.get)        }        logDebug("Task " + taskId + "'s epoch is " + task.epoch)        env.mapOutputTracker.updateEpoch(task.epoch)        // Run the actual task and measure its runtime.        //运行的实际任务，并测量它的运行时间。        taskStart = System.currentTimeMillis()        taskStartCpu = if (threadMXBean.isCurrentThreadCpuTimeSupported) {          threadMXBean.getCurrentThreadCpuTime        } else 0L        var threwException = true        val value = try {          // 调用Task.run方法，开始运行task          val res = task.run(            taskAttemptId = taskId,            attemptNumber = taskDescription.attemptNumber,            metricsSystem = env.metricsSystem)          threwException = false          res        } finally {          //清理所有分配的内存和分页,并检测是否有内存泄漏          val releasedLocks = env.blockManager.releaseAllLocksForTask(taskId)          val freedMemory = taskMemoryManager.cleanUpAllAllocatedMemory()          if (freedMemory > 0 && !threwException) {            val errMsg = s"Managed memory leak detected; size = $freedMemory bytes, TID = $taskId"            if (conf.getBoolean("spark.unsafe.exceptionOnMemoryLeak", false)) {              throw new SparkException(errMsg)            } else {              logWarning(errMsg)            }          }          if (releasedLocks.nonEmpty && !threwException) {            val errMsg =              s"${releasedLocks.size} block locks were not released by TID = $taskId:\n" +                releasedLocks.mkString("[", ", ", "]")            if (conf.getBoolean("spark.storage.exceptionOnPinLeak", false)) {              throw new SparkException(errMsg)            } else {              logInfo(errMsg)            }          }        }        task.context.fetchFailed.foreach { fetchFailure =>          // uh-oh.  it appears the user code has caught the fetch-failure without throwing any          // other exceptions.  Its *possible* this is what the user meant to do (though highly          // unlikely).  So we will log an error and keep going.          logError(s"TID ${taskId} completed successfully though internally it encountered " +            s"unrecoverable fetch failures!  Most likely this means user code is incorrectly " +            s"swallowing Spark's internal ${classOf[FetchFailedException]}", fetchFailure)        }        //记录Task完成时间        val taskFinish = System.currentTimeMillis()        val taskFinishCpu = if (threadMXBean.isCurrentThreadCpuTimeSupported) {          threadMXBean.getCurrentThreadCpuTime        } else 0L        // If the task has been killed, let's fail it.        task.context.killTaskIfInterrupted()        //否则序列化得到的Task执行的结果        val resultSer = env.serializer.newInstance()        val beforeSerialization = System.currentTimeMillis()        val valueBytes = resultSer.serialize(value)        val afterSerialization = System.currentTimeMillis()        //记录相关的metrics        // Deserialization happens in two parts: first, we deserialize a Task object, which        // includes the Partition. Second, Task.run() deserializes the RDD and function to be run.        task.metrics.setExecutorDeserializeTime(          (taskStart - deserializeStartTime) + task.executorDeserializeTime)        task.metrics.setExecutorDeserializeCpuTime(          (taskStartCpu - deserializeStartCpuTime) + task.executorDeserializeCpuTime)        // We need to subtract Task.run()'s deserialization time to avoid double-counting        task.metrics.setExecutorRunTime((taskFinish - taskStart) - task.executorDeserializeTime)        task.metrics.setExecutorCpuTime(          (taskFinishCpu - taskStartCpu) - task.executorDeserializeCpuTime)        task.metrics.setJvmGCTime(computeTotalGcTime() - startGCTime)        task.metrics.setResultSerializationTime(afterSerialization - beforeSerialization)        // Note: accumulator updates must be collected after TaskMetrics is updated        val accumUpdates = task.collectAccumulatorUpdates()        //创建直接返回给Driver的结果对象DirectTaskResult        // 生成DirectTaskResult对象，并序列化Task的运行结果        // TODO: do not serialize value twice        val directResult = new DirectTaskResult(valueBytes, accumUpdates)        val serializedDirectResult = ser.serialize(directResult)        val resultSize = serializedDirectResult.limit        // directSend = sending directly back to the driver        // 如果序列化后的结果比spark.driver.maxResultSize配置的还大，直接丢弃该结果        val serializedResult: ByteBuffer = {          //对直接返回的结果对象大小进行判断          if (maxResultSize > 0 && resultSize > maxResultSize) {            // 大于最大限制1G，直接丢弃ResultTask            logWarning(s"Finished $taskName (TID $taskId). Result is larger than maxResultSize " +              s"(${Utils.bytesToString(resultSize)} > ${Utils.bytesToString(maxResultSize)}), " +              s"dropping it.")            ser.serialize(new IndirectTaskResult[Any](TaskResultBlockId(taskId), resultSize))            // 如果序列化后的结果小于上面的配置，而大于spark.akka.frameSize - 200KB            // 结果通过BlockManager回传          } else if (resultSize > maxDirectResultSize) {            // 结果大小大于设定的阀值，则放入BlockManager中            val blockId = TaskResultBlockId(taskId)            env.blockManager.putBytes(              blockId,              new ChunkedByteBuffer(serializedDirectResult.duplicate()),              StorageLevel.MEMORY_AND_DISK_SER)            logInfo(              s"Finished $taskName (TID $taskId). $resultSize bytes result sent via BlockManager)")            // 返回非直接返回给Driver的对象TaskResultTask            ser.serialize(new IndirectTaskResult[Any](blockId, resultSize))          } else {            // 结果不大，直接传回给Driver            // 如果结果小于spark.akka.frameSize - 200KB，则可通过AKKA直接返回Task的该执行结果            logInfo(s"Finished $taskName (TID $taskId). $resultSize bytes result sent to driver")            serializedDirectResult          }        }        setTaskFinishedAndClearInterruptStatus()        // 更新当前Task的状态为finished    //通知Driver Task已完成        execBackend.statusUpdate(taskId, TaskState.FINISHED, serializedResult)      } catch {        case t: Throwable if hasFetchFailure && !Utils.isFatalError(t) =>          val reason = task.context.fetchFailed.get.toTaskFailedReason          if (!t.isInstanceOf[FetchFailedException]) {            // there was a fetch failure in the task, but some user code wrapped that exception            // and threw something else.  Regardless, we treat it as a fetch failure.            val fetchFailedCls = classOf[FetchFailedException].getName            logWarning(s"TID ${taskId} encountered a ${fetchFailedCls} and " +              s"failed, but the ${fetchFailedCls} was hidden by another " +              s"exception.  Spark is handling this like a fetch failure and ignoring the " +              s"other exception: $t")          }          setTaskFinishedAndClearInterruptStatus()          execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))        case t: TaskKilledException =>          logInfo(s"Executor killed $taskName (TID $taskId), reason: ${t.reason}")          setTaskFinishedAndClearInterruptStatus()          execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(TaskKilled(t.reason)))        case _: InterruptedException | NonFatal(_) if        task != null && task.reasonIfKilled.isDefined =>          val killReason = task.reasonIfKilled.getOrElse("unknown reason")          logInfo(s"Executor interrupted and killed $taskName (TID $taskId), reason: $killReason")          setTaskFinishedAndClearInterruptStatus()          execBackend.statusUpdate(            taskId, TaskState.KILLED, ser.serialize(TaskKilled(killReason)))        case CausedBy(cDE: CommitDeniedException) =>          val reason = cDE.toTaskFailedReason          setTaskFinishedAndClearInterruptStatus()          execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))        case t: Throwable =>          // Attempt to exit cleanly by informing the driver of our failure.          // If anything goes wrong (or this was a fatal exception), we will delegate to          // the default uncaught exception handler, which will terminate the Executor.          logError(s"Exception in $taskName (TID $taskId)", t)          // Collect latest accumulator values to report back to the driver          val accums: Seq[AccumulatorV2[_, _]] =            if (task != null) {              task.metrics.setExecutorRunTime(System.currentTimeMillis() - taskStart)              task.metrics.setJvmGCTime(computeTotalGcTime() - startGCTime)              task.collectAccumulatorUpdates(taskFailed = true)            } else {              Seq.empty            }          val accUpdates = accums.map(acc => acc.toInfo(Some(acc.value), None))          val serializedTaskEndReason = {            try {              ser.serialize(new ExceptionFailure(t, accUpdates).withAccums(accums))            } catch {              case _: NotSerializableException =>                // t is not serializable so just send the stacktrace                ser.serialize(new ExceptionFailure(t, accUpdates, false).withAccums(accums))            }          }          setTaskFinishedAndClearInterruptStatus()          execBackend.statusUpdate(taskId, TaskState.FAILED, serializedTaskEndReason)          // Don't forcibly exit unless the exception was inherently fatal, to avoid          // stopping other tasks unnecessarily.          if (Utils.isFatalError(t)) {            uncaughtExceptionHandler.uncaughtException(Thread.currentThread(), t)          }      } finally {        // 总runnint状态的task列表中将该task移除 //将Task从运行队列中去除        runningTasks.remove(taskId)      }    }

本段代码中有一句

 //为我们的Task创建内存管理器 ==》 管理单个任务分配的内存。      val taskMemoryManager = new TaskMemoryManager(env.memoryManager, taskId)

这个是java的实现，主要是内存管理，里面有页的处理，比较难以理解，涉及操作系统相关知识，这个以后讲解

然后开始运行任务

 // 调用Task.run方法，开始运行task          val res = task.run(            taskAttemptId = taskId,            attemptNumber = taskDescription.attemptNumber,            metricsSystem = env.metricsSystem)

这里进入看看

  * Called by [[org.apache.spark.executor.Executor]] to run this task.   *    * 被Executor调用以执行Task    *   * @param taskAttemptId an identifier for this task attempt that is unique within a SparkContext.   * @param attemptNumber how many times this task has been attempted (0 for the first attempt)   * @return the result of the task along with updates of Accumulators.    *   代码逻辑非常简单，概述如下：        1、需要创建一个Task上下文实例，即TaskContextImpl类型的context，这个TaskContextImpl主要包括以下内容：Task所属Stage的stageId、Task对应数据分区的partitionId、Task执行的taskAttemptId、Task执行的序号attemptNumber、Task内存管理器taskMemoryManager、指标度量系统metricsSystem、内部累加器internalAccumulators、是否本地运行的标志位runningLocally（为false）；        2、将context放入TaskContext的taskContext变量中，这个taskContext变量为ThreadLocal[TaskContext]；        3、在任务上下文context中设置主机名localHostName、内部累加器internalAccumulators等Metrics信息；        4、设置task线程为当前线程；        5、如果需要杀死task，调用kill()方法，且调用的方式为不中断线程；        6、调用runTask()方法，传入Task上下文信息context，执行Task，并调用Task上下文的collectAccumulators()方法，收集累加器；        7、最后，任务上下文context标记Task完成，为unrolling块释放当前线程使用的内存，清楚任务上下文等。   */  final def run(      taskAttemptId: Long,      attemptNumber: Int,      metricsSystem: MetricsSystem): T = {    SparkEnv.get.blockManager.registerTask(taskAttemptId)    // 创建一个Task上下文实例：TaskContextImpl类型的context    context = new TaskContextImpl(      stageId,      partitionId,      taskAttemptId,      attemptNumber,      taskMemoryManager,      localProperties,      metricsSystem,      metrics)    //  将context放入TaskContext的taskContext变量中    // taskContext变量为ThreadLocal[TaskContext]    TaskContext.setTaskContext(context)    // task线程为当前线程    taskThread = Thread.currentThread()    if (_reasonIfKilled != null) {      // 如果需要杀死task，调用kill()方法，且调用的方式为不中断线程      kill(interruptThread = false, _reasonIfKilled)    }    // 这个不知道是干嘛的？    new CallerContext(      "TASK",      SparkEnv.get.conf.get(APP_CALLER_CONTEXT),      appId,      appAttemptId,      jobId,      Option(stageId),      Option(stageAttemptId),      Option(taskAttemptId),      Option(attemptNumber)).setCurrentContext()    try {      // 调用runTask()方法，传入Task上下文信息context，执行Task，并调用Task上下文的collectAccumulators()方法，收集累加器      runTask(context)    } catch {      case e: Throwable =>        // Catch all errors; run task failure callbacks, and rethrow the exception.        try {          context.markTaskFailed(e)        } catch {          case t: Throwable =>            e.addSuppressed(t)        }        // 上下文标记Task完成        context.markTaskCompleted(Some(e))        throw e    } finally {      try {        // Call the task completion callbacks. If "markTaskCompleted" is called twice, the second        // one is no-op.        context.markTaskCompleted(None)      } finally {        try {          Utils.tryLogNonFatalError {            // Release memory used by this thread for unrolling blocks            // 为unrolling块释放当前线程使用的内存            SparkEnv.get.blockManager.memoryStore.releaseUnrollMemoryForThisTask(MemoryMode.ON_HEAP)            SparkEnv.get.blockManager.memoryStore.releaseUnrollMemoryForThisTask(              MemoryMode.OFF_HEAP)            // Notify any tasks waiting for execution memory to be freed to wake up and try to            // acquire memory again. This makes impossible the scenario where a task sleeps forever            // because there are no other tasks left to notify it. Since this is safe to do but may            // not be strictly necessary, we should revisit whether we can remove this in the            // future.            val memoryManager = SparkEnv.get.memoryManager            memoryManager.synchronized { memoryManager.notifyAll() }          }        } finally {          // Though we unset the ThreadLocal here, the context member variable itself is still          // queried directly in the TaskRunner to check for FetchFailedExceptions.          // 释放TaskContext          TaskContext.unset()        }      }    }  }

阅读全文

0 0