Spark2.2 任务调度机制schedule()源码剖析

来源:互联网 发布:教打字的软件 编辑:程序博客网 时间:2024/05/20 02:30

Scheduler()图解

这里写图片描述

Scheduler()代码剖析

schedule()

  /**   * leen   * 调度当前可用的资源为等待中的Applications   * 这个方法将在一个新的Application被提交,或者可用的resource变化的时候被调用。   */  private def schedule(): Unit = {    //1.首先判断Master的状态不是ALIVE的时候,则直接return    if (state != RecoveryState.ALIVE) {      return    }    //2.对处于ALIVE状态的Workers进行Shuffle[打乱]    val shuffledAliveWorkers = Random.shuffle(workers.toSeq.filter(_.state == WorkerState.ALIVE))    val numWorkersAlive = shuffledAliveWorkers.size    var curPos = 0    /**     * 在集群Worker上边启Driver     */    //3.遍历等待状态中的Drivers,将drivers分配给所有活着的Workers    for (driver <- waitingDrivers.toList) {      var launched = false      var numWorkersVisited = 0 //已经访问过的Workers      //4.当访问过的Worker的数量小于总的活着的Worker的数量,并且没有启动Driver      while (numWorkersVisited < numWorkersAlive && !launched) {        //5.取出当前位置的Worker        val worker = shuffledAliveWorkers(curPos)        //6.把已访问的Worker + 1        numWorkersVisited += 1        //7.如果Worker空余的内存 >= driver所需要的内存  && 如果Worker空余的CPU核数 >= driver所需要的CPU核数        if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {          //8.在这个worker上启动这个driver          launchDriver(worker, driver)          //9.这个driver已经启动,则从等待的Drivers中去除          waitingDrivers -= driver          //10.这个driver的提交状态变为true          launched = true        }        //11.将指针位置拨向下一个Worker        curPos = (curPos + 1) % numWorkersAlive      }    }    /**     * 12.在Workers上边启动Executors     */    startExecutorsOnWorkers()  }

schedule()

startExecutorsOnWorkers()

  /**   * leen   * 在Worker上 调度并且开始Executors   * 这是一个简单的FIFO调度   * 一直尝试着在队列中装配好第一个Application之后,紧接着装配第二个...以此论推   */  private def startExecutorsOnWorkers(): Unit = {    // 1. 遍历在队列中的Applications,并且这些Application需要分配CPU核数 > 0    for (app <- waitingApps if app.coresLeft > 0) {      // 2. 取出我们传入的每一个Executor所需要的核数      val coresPerExecutor: Option[Int] = app.desc.coresPerExecutor      /**       * 过滤出含有足够的资源启动Executors的Workers       * 3.1 Worker的状态是ALIVE       * 3.2 Worker的空余内存 >= Application启动每个Executor所需要的内存       * 3.3 Worker的空余核数 >= Application启动每个Executor所需要的核数       * 3.4 根据Worker空余的核数倒叙排序       */      val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)        .filter(worker => worker.memoryFree >= app.desc.memoryPerExecutorMB &&          worker.coresFree >= coresPerExecutor.getOrElse(1))        .sortBy(_.coresFree).reverse      /**       * 计算每个Worker上所分配的cores的集合       */      val assignedCores = scheduleExecutorsOnWorkers(app, usableWorkers, spreadOutApps)      //针对当前这个Application 在每一个可以含有足够资源启动Executors的Worker上启动Executor      for (pos <- 0 until usableWorkers.length if assignedCores(pos) > 0) {        allocateWorkerResourceToExecutors(          app, assignedCores(pos), coresPerExecutor, usableWorkers(pos))      }    }  }

schedule() —> startExecutorsOnWorkers()

scheduleExecutorsOnWorkers()

  /**   * scheduleExecutorsOnWorkers(app: ApplicationInfo,usableWorkers: Array[WorkerInfo],spreadOutApps: Boolean)   * 在Workers上启动调度的Executors   * 返回,每个Worker所需要调用的cores的array集合   *   * 【【有两种启动方式】】   * 一种是spreadOutApps,他尝试着分配一个Application的Executors到尽量多的Workers上边;   * 另一种是非spreadOutApps,它分配到一个Application的Executors到尽量少的Workers上去;   *   * 前者通常更适合数据本地化的目的,并且它是默认的方式   *   * 分配给每个executor的内核数是可配置的   * 当明确的配置的时候,来自同一个Application的多个Executors可能在相同的Worker上被启动,当这个Worker有足够的核数与内存的情况下   * 否则,默认情况下,每一个Executor会捕获Worker上所有可用的核数,在这种情况下,在每一个Worker上可能只会启动一个Executor。   *   * 一次性分配每个Executor所需要的cores到每一个Worker上边很重要 [代替每次分配一个core]   * 【即需要一个Executor只能用一个Worker的资源】   * 假设:集群有4个Worker,每个Worker16核;要求3个Executor,每个Executor需要16核;   * 如果一个core一次,则需要从每个Worker上取出12个core分配给每一个Executor   * 由于 12 < 16 ,将没有Executor被启动。   */  //【**** 返回一个对应Worker上启动多少个cores 的集合****】  private def scheduleExecutorsOnWorkers(                                          app: ApplicationInfo,                                          usableWorkers: Array[WorkerInfo],                                          spreadOutApps: Boolean): Array[Int] = {    //配置中每个Executor的cores    val coresPerExecutor = app.desc.coresPerExecutor    //每个Executor的最小核数【没配置 即为 1核 】    val minCoresPerExecutor = coresPerExecutor.getOrElse(1)    //判断配置中的每个Executor的cores是否为空【如果为空,则表示每个Executor只用分配一个core】    val oneExecutorPerWorker = coresPerExecutor.isEmpty    //配置中每个Executor的内存    val memoryPerExecutor = app.desc.memoryPerExecutorMB    //可用Worker的数量    val numUsable = usableWorkers.length    //定义一个 每一个Worker分配的核数的集合    val assignedCores = new Array[Int](numUsable)    //定义一个 每个Worker上分配的Executor的个数的集合    val assignedExecutors = new Array[Int](numUsable)    //可以/需要分配的核数【取Application所需要的核数 与 所有Worker空余的核数总和  的最小值】    var coresToAssign = math.min(app.coresLeft, usableWorkers.map(_.coresFree).sum)    /**     * canLaunchExecutor(pos: Int): Boolean     * 返回指定的worker是否可以为这个Application 运行一个Executor     */    def canLaunchExecutor(pos: Int): Boolean = {      //spreadOutApps 的      //判断是否继续调度:可以调用/需要调用的核数 >= 每个Executor所需要的最小核数      val keepScheduling = coresToAssign >= minCoresPerExecutor      //判断是否有足够的核数: 这个worker空余的核数 - 这个worker已经分配的核数 >= 一个Executor所需要的最小核数      val enoughCores = usableWorkers(pos).coresFree - assignedCores(pos) >= minCoresPerExecutor      //如果我们允许每个Worker有多个Executors,我们总能够启动新的Executors;      //否则的话如果这个Worker上已经有一个Executor,那么只能给这个Worker更多的核数      //如果在这个Worker上没有启动Executor,或者 一个Executor上需要启动多个cores      val launchingNewExecutor = !oneExecutorPerWorker || assignedExecutors(pos) == 0      if (launchingNewExecutor) {        //已经分配的memory        val assignedMemory = assignedExecutors(pos) * memoryPerExecutor        //判断是否有足够的memory: 这个worker空余的memory - 这个worker已经分配的memory >= 一个Executor所需要的最小memory        val enoughMemory = usableWorkers(pos).memoryFree - assignedMemory >= memoryPerExecutor        val underLimit = assignedExecutors.sum + app.executors.size < app.executorLimit        keepScheduling && enoughCores && enoughMemory && underLimit      } else {        //我们将 cores 添加到现有的executor,因此不需要检查内存和executor的限制        keepScheduling && enoughCores      }    }    //过滤出来可以提交至少一个Executor的workers    var freeWorkers = (0 until numUsable).filter(canLaunchExecutor)    // 一直提交Executor,直到没有可用的Worker 或者是到达了Application所需要的的Executor的上限    while (freeWorkers.nonEmpty) {      freeWorkers.foreach { pos =>        var keepScheduling = true        while (keepScheduling && canLaunchExecutor(pos)) {          coresToAssign -= minCoresPerExecutor          assignedCores(pos) += minCoresPerExecutor          // 如果每个Worker只启动一个Executor ,那么每一次循环给这个Executor分配一个core          //否则。每一次循环给一个新的Executor 增加一个core           if (oneExecutorPerWorker) {            assignedExecutors(pos) = 1          } else {            assignedExecutors(pos) += 1          }          // spreadOutApps  >>> 尽量分配Executors 到最多的Worker上;          // 非spreadOutApps  >>>  紧着一个Worker分配Executors,直到这个Worker的资源被用尽。          if (spreadOutApps) {            keepScheduling = false          }        }      }      //每一次循环,过滤出来可以提交至少一个Executor的workers      freeWorkers = freeWorkers.filter(canLaunchExecutor)    }    //返回,每个Worker所需要调用的cores的集合    assignedCores  }

schedule() —> startExecutorsOnWorkers()

allocateWorkerResourceToExecutors()

  /**   * 分配Worker的资源给一个或者多个Executors   * @param app executors 所属 application 的信息   * @param assignedCores 对于这个Application,在这个Worker上的cores数量   * @param coresPerExecutor 每个executor所需要的cores数量   * @param worker WorkerInfo   */  private def allocateWorkerResourceToExecutors(                                                 app: ApplicationInfo,                                                 assignedCores: Int,                                                 coresPerExecutor: Option[Int],                                                 worker: WorkerInfo): Unit = {    /**     * 如果每一个Executor所需的core的数量被配置,我们均匀的分配这个worker的cores给每一个Executor。     * 否则的话,我们仅仅启动一个Executor,它占用这个Worker的所有被分配出来的cores     */    // 计算Executor的个数    val numExecutors = coresPerExecutor.map {      assignedCores / _    }.getOrElse(1)    //每个Executor所需要的cores    val coresToAssign = coresPerExecutor.getOrElse(assignedCores)    //遍历 每一个Worker上的Executors    for (i <- 1 to numExecutors) {      //添加Executor的信息 返回  这个executor      val exec = app.addExecutor(worker, coresToAssign)      //在Worker上注册Executor      launchExecutor(worker, exec)      //变更Application的状态为 RUNNING      app.state = ApplicationState.RUNNING    }  }

schedule() —> startExecutorsOnWorkers() —> allocateWorkerResourceToExecutors()

addExecutor()
   /**   * ApplicationInfo.addExecutor()   * 添加Executor的信息 返回  这个executor   * @param worker   * @param cores   * @param useID   * @return   */  private[master] def addExecutor(      worker: WorkerInfo,      cores: Int,      useID: Option[Int] = None): ExecutorDesc = {    val exec = new ExecutorDesc(newExecutorId(useID), this, worker, cores, desc.memoryPerExecutorMB)    executors(exec.id) = exec    coresGranted += cores    exec  }

schedule() —> startExecutorsOnWorkers() —> allocateWorkerResourceToExecutors()

launchExecutor()
  /**   * 部署执行Executor   * @param worker   * @param exec   */  private def launchExecutor(worker: WorkerInfo, exec: ExecutorDesc): Unit = {    logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)    worker.addExecutor(exec)    worker.endpoint.send(LaunchExecutor(masterUrl,      exec.application.id, exec.id, exec.application.desc, exec.cores, exec.memory))    exec.application.driver.send(      ExecutorAdded(exec.id, worker.id, worker.hostPort, exec.cores, exec.memory))  }