spark源码学习(二)---Master源码分析(2)-master内组件状态改变机制

来源:互联网 发布:linux常用软件包 编辑:程序博客网 时间:2024/05/19 19:30

本篇学习master里面几个组件状态改变时发生的事情,自己写自己看,不对之处请拍砖。


首先,来看driver的状态改变:

    case DriverStateChanged(driverId, state, exception) => {      state match {        //如果driver的状态是ERROR,Finished,killed,或者Failed,那么移除该driver信息        case DriverState.ERROR | DriverState.FINISHED | DriverState.KILLED | DriverState.FAILED =>          removeDriver(driverId, state, exception)        case _ =>          throw new Exception(s"Received unexpected state update for driver $driverId: $state")      }    }

当driver的状态编程上面的状态的时候,会执行remover driver,remove的操作基本上就是移除内存中的driver,worker,并修改状态信息的操作,remove的代码如下:

  private def removeDriver(      driverId: String,      finalState: DriverState,      exception: Option[Exception]) {    //首先,查看是否存在该driver,如果不存在日志警告;如果存在该driver,将drivers中的该driver移除    drivers.find(d => d.id == driverId) match {      case Some(driver) =>        logInfo(s"Removing driver: $driverId")        //从set中移除该driver        drivers -= driver                if (completedDrivers.size >= RETAINED_DRIVERS) {          val toRemove = math.max(RETAINED_DRIVERS / 10, 1)          completedDrivers.trimStart(toRemove)        }        //将移除的driver加入到完成的driver记录容器中        completedDrivers += driver        //移除driver的持久化信息        persistenceEngine.removeDriver(driver)        //更改driver的状态        driver.state = finalState        driver.exception = exception        //移除该driver对应的worker        driver.worker.foreach(w => w.removeDriver(driver))        schedule()      case None =>        logWarning(s"Asked to remove unknown driver: $driverId")    }  }}

可以发现master类中RequestKillDriver方法:master收到要求kill掉该driver的时候会触发driver状态改变   

case RequestKillDriver(driverId) => {      if (state != RecoveryState.ALIVE) {        val msg = s"${Utils.BACKUP_STANDALONE_MASTER_PREFIX}: $state. " +          s"Can only kill drivers in ALIVE state."        context.reply(KillDriverResponse(self, driverId, success = false, msg))      } else {        logInfo("Asked to kill driver " + driverId)        val driver = drivers.find(_.id == driverId)        driver match {          case Some(d) =>            if (waitingDrivers.contains(d)) {              waitingDrivers -= d              self.send(DriverStateChanged(driverId, DriverState.KILLED, None))            } else {              // We just notify the worker to kill the driver here. The final bookkeeping occurs              // on the return path when the worker submits a state change back to the master              // to notify it that the driver was successfully killed.              d.worker.foreach { w =>                w.endpoint.send(KillDriver(driverId))              }            }            // TODO: It would be nice for this to be a synchronous response            val msg = s"Kill request for $driverId submitted"            logInfo(msg)            context.reply(KillDriverResponse(self, driverId, success = true, msg))          case None =>            val msg = s"Driver $driverId has already finished or does not exist"            logWarning(msg)            context.reply(KillDriverResponse(self, driverId, success = false, msg))        }      }    }


下面分析executor的状态改变:

    case ExecutorStateChanged(appId, execId, state, message, exitStatus) => {      //找到exexutor对应的application,通过获取executor的信息      val execOption = idToApp.get(appId).flatMap(app => app.executors.get(execId))            execOption match {      //如果找到了该executor        case Some(exec) => {          val appInfo = idToApp(appId)          //修改状态为当前的新状态          val oldState = exec.state          exec.state = state          //如果该executor正在运行,那么,重新设置APP的重试次数          if (state == ExecutorState.RUNNING) {            assert(oldState == ExecutorState.LAUNCHING,              s"executor $execId state transfer from $oldState to RUNNING is illegal")            appInfo.resetRetryCount()          }          //向driver发送executor的更新信息          exec.application.driver.send(ExecutorUpdated(execId, state, message, exitStatus))          //如果executor状态是finished,则从worker和APP中移除该driver          if (ExecutorState.isFinished(state)) {            // Remove this executor from the worker and app            logInfo(s"Removing executor ${exec.fullId} because it is $state")            // If an application has already finished, preserve its            // state to display its information properly on the UI            // 如果AP没有完成,需要保持状态以便将信息正确输出到ui,那么不从APP中移除executor,只移除worker中的executor            if (!appInfo.isFinished) {              appInfo.removeExecutor(exec)            }            exec.worker.removeExecutor(exec)                        val normalExit = exitStatus == Some(0)            // Only retry certain number of times so we don't go into an infinite loop.            //如果executor退出状态是非正常的            if (!normalExit) {              //如果没有达到app重试次数最大值,那么会重新执行调度Schedule方法              if (appInfo.incrementRetryCount() < ApplicationState.MAX_NUM_RETRY) {                schedule()              } else {                //否则 进行removeApplication操作                val execs = appInfo.executors.values                if (!execs.exists(_.state == ExecutorState.RUNNING)) {                  logError(s"Application ${appInfo.desc.name} with ID ${appInfo.id} failed " +                    s"${appInfo.retryCount} times; removing it")                  removeApplication(appInfo, ApplicationState.FAILED)                }              }            }          }        }        case None =>          logWarning(s"Got status update for unknown executor $appId/$execId")      }    }















0 0
原创粉丝点击