Spark系统运行内幕机制循环流程

来源：互联网发布：硬盘初始化数据错误编辑：程序博客网时间：2024/05/18 01:27

在SparkContext实例化的时候调用createTaskScheduler来创建TaskSchedulerImpl和SparkDeploySchedulerBackend，同时在SparkContext实例化的时候会调用TaskSchedulerImpl的start方法，在start方法中会调用SparkDeploySchedulerBackend的start方法，在该start方法中会创建AppClient对象并调用AppClient对象的start方法，在该start方法中会创建ClientEndpoint，在创建ClientEndpoint会传入Command来指定具体为当前应用程序启动的Executor进行的入口类的名称为CoarseGrainedExecutorBackend，然后ClientEndpoint启动并通过tryRegisterMaster来注册当前的应用程序到Master中，Master接收到注册信息后，如果可以运行程序，则会为该程序生成JobID并通过schedule来分配计算资源，具体计算资源的分配是通过应用程序的运行方式、Memory、Cores等配置信息来决定的，最后Master会发送指令给Worker，Worker中为当前应用程序分配计算资源时会首先分配ExecutorRunner，ExecutorRunner内部会通过Thread的方式构建ProcessBuilder来启动另外一个JVM进程，这个JVM进程启动时加载的main方法所在的类的名称就是在创建ClientEndpoint时传入Command来指定具体为CoarseGrainedExecutorBackend的类，此时JHVM在通过ProcessBuilder启动的时候获得了CoarseGrainedExecutorBackend后加载并调用了其中的main方法，在main方法中会实例化CoarseGrainedExecutorBackend本身这个消息循环体，而CoarseGrainedExecutorBackend在实例化的时候会通过回调onStart向DriverEndpoint发送RegisterExecutor来注册当前的CoarseGrainedExecutorBackend，此时DriverEndoint收到该注册信息，并保存在了SparkDeploySchedulerBackend实例的内存数据结构中，这样Driver就获得了计算资源，同时并发送RegisteredExecutor给CoarseGrainedExecutorBackend

【用户程序】WordCount.scala

object WordCount {  def main(args: Array[String]): Unit = {    val conf = new SparkConf()    conf.setAppName("WordCount")    conf.setMaster("local") //程序在本地运行，但是以下的例子以standlone模式分析    val sc = new SparkContext(conf)    val lines = sc.textFile("D://code//scala//WordCount//wordcount.txt")    val line=lines.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_+_).map(s=>(s._2,s._1)).sortByKey(false).map(s=>(s._2,s._1)).collect().foreach(println)    sc.stop()  }}

new SparkContext(conf): 初始化SparkContext

【源代码】SparkContext.scala：

// 属性初始化// Create and start the schedulerval (sched, ts) = SparkContext.createTaskScheduler(this, master)_schedulerBackend = sched_taskScheduler = ts_dagScheduler = new DAGScheduler(this)_heartbeatReceiver.ask[Boolean](TaskSchedulerIsSet)// start TaskScheduler after taskScheduler sets DAGScheduler reference in DAGScheduler's// constructor_taskScheduler.start()...private def createTaskScheduler(      sc: SparkContext,      master: String): (SchedulerBackend, TaskScheduler) = {...case SPARK_REGEX(sparkUrl) =>    val scheduler = new TaskSchedulerImpl(sc)    val masterUrls = sparkUrl.split(",").map("spark://" + _)    val backend = new SparkDeploySchedulerBackend(scheduler, sc, masterUrls)    scheduler.initialize(backend)    (backend, scheduler)...}

scheduler.initialize(backend):

【源代码】TaskSchedulerImpl.scala:

def initialize(backend: SchedulerBackend) {    this.backend = backend    // temporarily set rootPool name to empty    rootPool = new Pool("", schedulingMode, 0, 0)    schedulableBuilder = {      schedulingMode match {        case SchedulingMode.FIFO =>          new FIFOSchedulableBuilder(rootPool)        case SchedulingMode.FAIR =>          new FairSchedulableBuilder(rootPool, conf)      }    }    schedulableBuilder.buildPools()  }

_taskScheduler.start()

【源代码】TaskSchedulerImpl.scala:

override def start() {    backend.start()    if (!isLocal && conf.getBoolean("spark.speculation", false)) {      logInfo("Starting speculative execution thread")      speculationScheduler.scheduleAtFixedRate(new Runnable {        override def run(): Unit = Utils.tryOrStopSparkContext(sc) {          checkSpeculatableTasks()        }      }, SPECULATION_INTERVAL_MS, SPECULATION_INTERVAL_MS, TimeUnit.MILLISECONDS)    }  }

backend.start()

【源代码】SparkDeploySchedulerBackend.scala

override def start() {    super.start()    launcherBackend.connect()    // The endpoint for executors to talk to us    val driverUrl = rpcEnv.uriOf(SparkEnv.driverActorSystemName,      RpcAddress(sc.conf.get("spark.driver.host"), sc.conf.get("spark.driver.port").toInt),      CoarseGrainedSchedulerBackend.ENDPOINT_NAME)    val args = Seq(      "--driver-url", driverUrl,      "--executor-id", "{{EXECUTOR_ID}}",      "--hostname", "{{HOSTNAME}}",      "--cores", "{{CORES}}",      "--app-id", "{{APP_ID}}",      "--worker-url", "{{WORKER_URL}}")    val extraJavaOpts = sc.conf.getOption("spark.executor.extraJavaOptions")      .map(Utils.splitCommandString).getOrElse(Seq.empty)    val classPathEntries = sc.conf.getOption("spark.executor.extraClassPath")      .map(_.split(java.io.File.pathSeparator).toSeq).getOrElse(Nil)    val libraryPathEntries = sc.conf.getOption("spark.executor.extraLibraryPath")      .map(_.split(java.io.File.pathSeparator).toSeq).getOrElse(Nil)    // When testing, expose the parent class path to the child. This is processed by    // compute-classpath.{cmd,sh} and makes all needed jars available to child processes    // when the assembly is built with the "*-provided" profiles enabled.    val testingClassPath =      if (sys.props.contains("spark.testing")) {        sys.props("java.class.path").split(java.io.File.pathSeparator).toSeq      } else {        Nil      }    // Start executors with a few necessary configs for registering with the scheduler    val sparkJavaOpts = Utils.sparkJavaOpts(conf, SparkConf.isExecutorStartupConf)    val javaOpts = sparkJavaOpts ++ extraJavaOpts    val command = Command("org.apache.spark.executor.CoarseGrainedExecutorBackend",      args, sc.executorEnvs, classPathEntries ++ testingClassPath, libraryPathEntries, javaOpts)    val appUIAddress = sc.ui.map(_.appUIAddress).getOrElse("")    val coresPerExecutor = conf.getOption("spark.executor.cores").map(_.toInt)    val appDesc = new ApplicationDescription(sc.appName, maxCores, sc.executorMemory,      command, appUIAddress, sc.eventLogDir, sc.eventLogCodec, coresPerExecutor)    client = new AppClient(sc.env.rpcEnv, masters, appDesc, this, conf)    client.start()    launcherBackend.setState(SparkAppHandle.State.SUBMITTED)    waitForRegistration()    launcherBackend.setState(SparkAppHandle.State.RUNNING)  }

这里传入Command来指定具体为当前应用程序启动的Executor进行的入口类的名称为CoarseGrainedExecutorBackend，然后将appDesc内容作为AppClient的参数，。创建AppClient对象并调用AppClient对象的start方法

client.start():AppClient对象的start方法，在该start方法中会创建ClientEndpoint

【源代码】AppClient.scala

def start() {    // Just launch an rpcEndpoint; it will call back into the listener.    endpoint.set(rpcEnv.setupEndpoint("AppClient", new ClientEndpoint(rpcEnv)))  }

new ClientEndpoint(rpcEnv): ClientEndpoint.onStart()
【源代码】AppClient.scala

private class ClientEndpoint(override val rpcEnv: RpcEnv) extends ThreadSafeRpcEndpoint    with Logging {    ...    override def onStart(): Unit = {      try {        registerWithMaster(1)      } catch {        case e: Exception =>          logWarning("Failed to connect to master", e)          markDisconnected()          stop()      }    }    ...}

registerWithMaster(1): registerWithMaster再调用tryRegisterMaster来注册当前的应用程序到Master中

【源代码】AppClient.scala

private def registerWithMaster(nthRetry: Int) {      registerMasterFutures.set(tryRegisterAllMasters())      registrationRetryTimer.set(registrationRetryThread.scheduleAtFixedRate(new Runnable {        override def run(): Unit = {          Utils.tryOrExit {            if (registered.get) {              registerMasterFutures.get.foreach(_.cancel(true))              registerMasterThreadPool.shutdownNow()            } else if (nthRetry >= REGISTRATION_RETRIES) {              markDead("All masters are unresponsive! Giving up.")            } else {              registerMasterFutures.get.foreach(_.cancel(true))              registerWithMaster(nthRetry + 1)            }          }        }      }, REGISTRATION_TIMEOUT_SECONDS, REGISTRATION_TIMEOUT_SECONDS, TimeUnit.SECONDS))    }

tryRegisterAllMasters(): 发送RegisterApplication(appDescription,self))消息向Master注册

【源代码】AppClient.scala

private def tryRegisterAllMasters(): Array[JFuture[_]] = {      for (masterAddress <- masterRpcAddresses) yield {        registerMasterThreadPool.submit(new Runnable {          override def run(): Unit = try {            if (registered.get) {              return            }            logInfo("Connecting to master " + masterAddress.toSparkURL + "...")            val masterRef =              rpcEnv.setupEndpointRef(Master.SYSTEM_NAME, masterAddress, Master.ENDPOINT_NAME)            masterRef.send(RegisterApplication(appDescription, self))          } catch {            case ie: InterruptedException => // Cancelled            case NonFatal(e) => logWarning(s"Failed to connect to master $masterAddress", e)          }        })      }    }

masterRef.send(RegisterApplication(appDescription, self)): master收到RegisterApplication消息以后，Master接受到注册信息后如何可以运行程序，则会为该程序生产Job ID并通过schedule来分配计算资源，具体计算资源的分配是通过应用程序的运行方式、Memory、cores等配置信息来决定的，schedule()资源调度

【源代码】Master.scala

override def receive: PartialFunction[Any, Unit] = {    ...    case RegisterApplication(description, driver) => {      // TODO Prevent repeated registrations from some driver      if (state == RecoveryState.STANDBY) {        // ignore, don't send response      } else {        logInfo("Registering app " + description.name)        val app = createApplication(description, driver)        registerApplication(app)        logInfo("Registered app " + description.name + " with ID " + app.id)        persistenceEngine.addApplication(app)        driver.send(RegisteredApplication(app.id, self))        schedule()      }    }    ...}

schedule(): master进行schedule()资源调度，在一台worker上启动driver，launchDriver(worker, driver)，然后在worker上启动executors

【源代码】Master.scala

private def schedule(): Unit = {    if (state != RecoveryState.ALIVE) { return }    // Drivers take strict precedence over executors    val shuffledWorkers = Random.shuffle(workers) // Randomization helps balance drivers    for (worker <- shuffledWorkers if worker.state == WorkerState.ALIVE) {      for (driver <- waitingDrivers) {        if (worker.memoryFree >= driver.desc.mem && worker.coresFree >= driver.desc.cores) {          launchDriver(worker, driver)          waitingDrivers -= driver        }      }    }    startExecutorsOnWorkers()  }

startExecutorsOnWorkers(): master进行schedule()资源调度，在workers上启动executors

【源代码】Master.scala

private def startExecutorsOnWorkers(): Unit = {    // Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app    // in the queue, then the second app, etc.    for (app <- waitingApps if app.coresLeft > 0) {      val coresPerExecutor: Option[Int] = app.desc.coresPerExecutor      // Filter out workers that don't have enough resources to launch an executor      val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)        .filter(worker => worker.memoryFree >= app.desc.memoryPerExecutorMB &&          worker.coresFree >= coresPerExecutor.getOrElse(1))        .sortBy(_.coresFree).reverse      val assignedCores = scheduleExecutorsOnWorkers(app, usableWorkers, spreadOutApps)      // Now that we've decided how many cores to allocate on each worker, let's allocate them      for (pos <- 0 until usableWorkers.length if assignedCores(pos) > 0) {        allocateWorkerResourceToExecutors(          app, assignedCores(pos), coresPerExecutor, usableWorkers(pos))      }    }  }

allocateWorkerResourceToExecutors(app, assignedCores(pos), coresPerExecutor, usableWorkers(pos))： master决定好了分配多少cores给worker，就开始分配启动worker

【源代码】Master.scala

private def allocateWorkerResourceToExecutors(      app: ApplicationInfo,      assignedCores: Int,      coresPerExecutor: Option[Int],      worker: WorkerInfo): Unit = {    // If the number of cores per executor is specified, we divide the cores assigned    // to this worker evenly among the executors with no remainder.    // Otherwise, we launch a single executor that grabs all the assignedCores on this worker.    val numExecutors = coresPerExecutor.map { assignedCores / _ }.getOrElse(1)    val coresToAssign = coresPerExecutor.getOrElse(assignedCores)    for (i <- 1 to numExecutors) {      val exec = app.addExecutor(worker, coresToAssign)      launchExecutor(worker, exec)      app.state = ApplicationState.RUNNING    }  }

launchExecutor(worker, exec): master启动worker

【源代码】Master.scala

private def launchExecutor(worker: WorkerInfo, exec: ExecutorDesc): Unit = {    logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)    worker.addExecutor(exec)    worker.endpoint.send(LaunchExecutor(masterUrl,      exec.application.id, exec.id, exec.application.desc, exec.cores, exec.memory))    exec.application.driver.send(      ExecutorAdded(exec.id, worker.id, worker.hostPort, exec.cores, exec.memory))  }

worker.endpoint.send(…): worker收到LaunchExecutor消息了，首先分配ExecutorRunner

【源代码】Worker.scala

override def receive: PartialFunction[Any, Unit] = synchronized {    ...    case LaunchExecutor(masterUrl, appId, execId, appDesc, cores_, memory_) =>      if (masterUrl != activeMasterUrl) {        logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")      } else {        try {          logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))          // Create the executor's working directory          val executorDir = new File(workDir, appId + "/" + execId)          if (!executorDir.mkdirs()) {            throw new IOException("Failed to create directory " + executorDir)          }          // Create local dirs for the executor. These are passed to the executor via the          // SPARK_EXECUTOR_DIRS environment variable, and deleted by the Worker when the          // application finishes.          val appLocalDirs = appDirectories.get(appId).getOrElse {            Utils.getOrCreateLocalRootDirs(conf).map { dir =>              val appDir = Utils.createDirectory(dir, namePrefix = "executor")              Utils.chmod700(appDir)              appDir.getAbsolutePath()            }.toSeq          }          appDirectories(appId) = appLocalDirs          val manager = new ExecutorRunner(            appId,            execId,            appDesc.copy(command = Worker.maybeUpdateSSLSettings(appDesc.command, conf)),            cores_,            memory_,            self,            workerId,            host,            webUi.boundPort,            publicAddress,            sparkHome,            executorDir,            workerUri,            conf,            appLocalDirs, ExecutorState.RUNNING)          executors(appId + "/" + execId) = manager          manager.start()          coresUsed += cores_          memoryUsed += memory_          sendToMaster(ExecutorStateChanged(appId, execId, manager.state, None, None))        } catch {          case e: Exception => {            logError(s"Failed to launch executor $appId/$execId for ${appDesc.name}.", e)            if (executors.contains(appId + "/" + execId)) {              executors(appId + "/" + execId).kill()              executors -= appId + "/" + execId            }            sendToMaster(ExecutorStateChanged(appId, execId, ExecutorState.FAILED,              Some(e.toString), None))          }        }      }      ...}

manager.start()

【源代码】ExecutorRunner.scala

private[worker] def start() {    workerThread = new Thread("ExecutorRunner for " + fullId) {      override def run() { fetchAndRunExecutor() }    }    workerThread.start()    // Shutdown hook that kills actors on shutdown.    shutdownHook = ShutdownHookManager.addShutdownHook { () =>      // It's possible that we arrive here before calling `fetchAndRunExecutor`, then `state` will      // be `ExecutorState.RUNNING`. In this case, we should set `state` to `FAILED`.      if (state == ExecutorState.RUNNING) {        state = ExecutorState.FAILED      }      killProcess(Some("Worker shutting down")) }  }

fetchAndRunExecutor(): 下载运行的程序并运行executor

【源代码】ExecutorRunner.scala

private def fetchAndRunExecutor() {    try {      // Launch the process      val builder = CommandUtils.buildProcessBuilder(appDesc.command, new SecurityManager(conf),        memory, sparkHome.getAbsolutePath, substituteVariables)      val command = builder.command()      val formattedCommand = command.asScala.mkString("\"", "\" \"", "\"")      logInfo(s"Launch command: $formattedCommand")      builder.directory(executorDir)      builder.environment.put("SPARK_EXECUTOR_DIRS", appLocalDirs.mkString(File.pathSeparator))      // In case we are running this from within the Spark Shell, avoid creating a "scala"      // parent process for the executor command      builder.environment.put("SPARK_LAUNCH_WITH_SCALA", "0")      // Add webUI log urls      val baseUrl =        s"http://$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType="      builder.environment.put("SPARK_LOG_URL_STDERR", s"${baseUrl}stderr")      builder.environment.put("SPARK_LOG_URL_STDOUT", s"${baseUrl}stdout")      process = builder.start()      val header = "Spark Executor Command: %s\n%s\n\n".format(        formattedCommand, "=" * 40)      // Redirect its stdout and stderr to files      val stdout = new File(executorDir, "stdout")      stdoutAppender = FileAppender(process.getInputStream, stdout, conf)      val stderr = new File(executorDir, "stderr")      Files.write(header, stderr, UTF_8)      stderrAppender = FileAppender(process.getErrorStream, stderr, conf)      // Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown)      // or with nonzero exit code      val exitCode = process.waitFor()      state = ExecutorState.EXITED      val message = "Command exited with code " + exitCode      worker.send(ExecutorStateChanged(appId, execId, state, Some(message), Some(exitCode)))    } catch {      case interrupted: InterruptedException => {        logInfo("Runner thread for executor " + fullId + " interrupted")        state = ExecutorState.KILLED        killProcess(None)      }      case e: Exception => {        logError("Error running executor", e)        state = ExecutorState.FAILED        killProcess(Some(e.toString))      }    }  }

ExecutorRunner内部会通过Thread的方式构建ProcessBuilder来启动另外一个JVM进程，这个JVM进程启动时候加载的main方法所在的类的名称就是在创建ClientEndpoint时传入的Command来指定具体名称为CoarseGrainedExecutorBackend的类，此时JVM在通过ProcessBuilder启动的时候获得了CoarseGrainedExecutorBackend后加载并调用其中的main方法，在main方法中会实例化CoarseGrainedExecutorBackend本身这个消息循环体

补充说明：
【源代码】ExecutorRunner.scala

val builder = CommandUtils.buildProcessBuilder(appDesc.command, new SecurityManager(conf),memory, sparkHome.getAbsolutePath,substituteVariables)

直接调用object对象CommandUtils的buildProcessBuilder方法，记录command的spark classpath信息

【源代码】CommandUtils.scala

def buildProcessBuilder(      command: Command,      securityMgr: SecurityManager,      memory: Int,      sparkHome: String,      substituteArguments: String => String,      classPaths: Seq[String] = Seq[String](),      env: Map[String, String] = sys.env): ProcessBuilder = {    val localCommand = buildLocalCommand(      command, securityMgr, substituteArguments, classPaths, env)    val commandSeq = buildCommandSeq(localCommand, memory, sparkHome)    val builder = new ProcessBuilder(commandSeq: _*)    val environment = builder.environment()    for ((key, value) <- localCommand.environment) {      environment.put(key, value)    }    builder  }

builder.start(): ProcessImpl就是jvm新开辟的线程
【源代码】ProcessBuilder.scala

public Process start() throws IOException {        // Must convert to array first -- a malicious user-supplied        // list might try to circumvent the security check.        String[] cmdarray = command.toArray(new String[command.size()]);        cmdarray = cmdarray.clone();        for (String arg : cmdarray)            if (arg == null)                throw new NullPointerException();        // Throws IndexOutOfBoundsException if command is empty        String prog = cmdarray[0];        SecurityManager security = System.getSecurityManager();        if (security != null)            security.checkExec(prog);        String dir = directory == null ? null : directory.toString();        for (int i = 1; i < cmdarray.length; i++) {            if (cmdarray[i].indexOf('\u0000') >= 0) {                throw new IOException("invalid null character in command");            }        }        try {            return ProcessImpl.start(cmdarray,                                     environment,                                     dir,                                     redirects,                                     redirectErrorStream);        } catch (IOException | IllegalArgumentException e) {            String exceptionInfo = ": " + e.getMessage();            Throwable cause = e;            if ((e instanceof IOException) && security != null) {                // Can not disclose the fail reason for read-protected files.                try {                    security.checkRead(prog);                } catch (SecurityException se) {                    exceptionInfo = "";                    cause = se;                }            }            // It's much easier for us to create a high-quality error            // message than the low-level C code which found the problem.            throw new IOException(                "Cannot run program \"" + prog + "\""                + (dir == null ? "" : " (in directory \"" + dir + "\")")                + exceptionInfo,                cause);        }    }

0 0