Spark源码分析-worker
来源:互联网 发布:淘宝网电脑端描述 编辑:程序博客网 时间:2024/05/16 06:23
Worker接受LaunchDriver()的源码
case LaunchDriver(driverId, driverDesc) => { logInfo(s"Asked to launch driver $driverId") //1.新建了一个DrievrRunner实例对象 val driver = new DriverRunner( conf, driverId, workDir, sparkHome, driverDesc.copy(command = Worker.maybeUpdateSSLSettings(driverDesc.command, conf)), self, akkaUrl) //2.将driver加入内存 drivers(driverId) = driver //3. 启动driver driver.start() //4.将worker内存,cpu使用情况进行更新 coresUsed += driverDesc.cores memoryUsed += driverDesc.mem }
start()源码
def start() = { //3.1创建了一个新线程 new Thread("DriverRunner for " + driverId) { override def run() { try { //3.2.创建一个工作目录 val driverDir = createWorkingDirectory() //3.3下载用户上传的的Jar文件 val localJarFilename = downloadUserJar(driverDir) // def substituteVariables(argument: String): String = argument match { case "{{WORKER_URL}}" => workerUrl case "{{USER_JAR}}" => localJarFilename case other => other } // TODO: If we add ability to submit multiple jars they should also be added here //3.4 构建ProcessBuilder,传入了driver的启动命令,需要的内存大小等信息 val builder = CommandUtils.buildProcessBuilder(driverDesc.command, driverDesc.mem, sparkHome.getAbsolutePath, substituteVariables) //3.5启动driver launchDriver(builder, driverDir, driverDesc.supervise) } catch { case e: Exception => finalException = Some(e) } //3.6 判断driver退出的状态 val state = if (killed) { DriverState.KILLED } else if (finalException.isDefined) { DriverState.ERROR } else { finalExitCode match { case Some(0) => DriverState.FINISHED case _ => DriverState.FAILED } } finalState = Some(state) //3.7将driver状态改变信息发送给worker worker ! DriverStateChanged(driverId, state, finalException) } }.start() }
createWorkingDirectory()源码
private def createWorkingDirectory(): File = { //3.2.1创建了一个本地路径 val driverDir = new File(workDir, driverId) //如果路径不存在或者路径创建失败,抛出异常 if (!driverDir.exists() && !driverDir.mkdirs()) { throw new IOException("Failed to create directory " + driverDir) } driverDir }
downloadUserJar()源码
private def downloadUserJar(driverDir: File): String = { //3.3.1 用hadoop的Path得到jar文件的路径 val jarPath = new Path(driverDesc.jarUrl) //3.3.2 得到hadoop的conf配置 val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf) //3.3.3 得到了HDFS的FileSystem val jarFileSystem = jarPath.getFileSystem(hadoopConf) //3.3.4 创建本地目录 val destPath = new File(driverDir.getAbsolutePath, jarPath.getName) val jarFileName = jarPath.getName val localJarFile = new File(driverDir, jarFileName) val localJarFilename = localJarFile.getAbsolutePath //3.3.5 判断本地文件是否存在,如果不存在,用FileUtil将HDFS上的jar拷贝到本地 if (!localJarFile.exists()) { // May already exist if running multiple workers on one node logInfo(s"Copying user jar $jarPath to $destPath") FileUtil.copy(jarFileSystem, jarPath, destPath, false, hadoopConf) } //3.3.6 再次进行判断,用来确认,如果还不存在,说明copy失败,抛出异常。 if (!localJarFile.exists()) { // Verify copy succeeded throw new Exception(s"Did not see expected jar $jarFileName in $driverDir") } localJarFilename }
launchDriver()源码
private def launchDriver(builder: ProcessBuilder, baseDir: File, supervise: Boolean) { builder.directory(baseDir) def initialize(process: Process) = { // Redirect stdout and stderr to files //3.5.1 重定向stdout和stderr到文件中 val stdout = new File(baseDir, "stdout") CommandUtils.redirectStream(process.getInputStream, stdout) val stderr = new File(baseDir, "stderr") val header = "Launch Command: %s\n%s\n\n".format( builder.command.mkString("\"", "\" \"", "\""), "=" * 40) Files.append(header, stderr, UTF_8) CommandUtils.redirectStream(process.getErrorStream, stderr) } // 3.5.2 通过ProcessBuilder启动driver进程 runCommandWithRetry(ProcessBuilderLike(builder), initialize, supervise) }
runCommandWithRetry()源码
private[deploy] def runCommandWithRetry(command: ProcessBuilderLike, initialize: Process => Unit, supervise: Boolean) { // Time to wait between submission retries. var waitSeconds = 1 // A run of this many seconds resets the exponential back-off. val successfulRunDuration = 5 var keepTrying = !killed while (keepTrying) { logInfo("Launch Command: " + command.command.mkString("\"", "\" \"", "\"")) synchronized { if (killed) { return } process = Some(command.start()) initialize(process.get) } val processStart = clock.getTimeMillis() //启动driver进程 val exitCode = process.get.waitFor() if (clock.getTimeMillis() - processStart > successfulRunDuration * 1000) { waitSeconds = 1 } if (supervise && exitCode != 0 && !killed) { logInfo(s"Command exited with status $exitCode, re-launching after $waitSeconds s.") sleeper.sleep(waitSeconds) waitSeconds = waitSeconds * 2 // exponential back-off } keepTrying = supervise && exitCode != 0 && !killed finalExitCode = Some(exitCode) } }}
Worker接收DriverStateChanged()源码
case DriverStateChanged(driverId, state, exception) => { // 1.先对driver的结束状态进行判断 state match { case DriverState.ERROR => logWarning(s"Driver $driverId failed with unrecoverable exception: ${exception.get}") case DriverState.FAILED => logWarning(s"Driver $driverId exited with failure") case DriverState.FINISHED => logInfo(s"Driver $driverId exited successfully") case DriverState.KILLED => logInfo(s"Driver $driverId was killed by user") case _ => logDebug(s"Driver $driverId changed state to $state") } // 2.再向master发送DriverStateChanged信息 master ! DriverStateChanged(driverId, state, exception) // 3.将driver从内存中移除 val driver = drivers.remove(driverId).get finishedDrivers(driverId) = driver // 4.更新内存信息 memoryUsed -= driver.driverDesc.mem coresUsed -= driver.driverDesc.cores }
LaunchExecutor()源码
case LaunchExecutor(masterUrl, appId, execId, appDesc, cores_, memory_) => //先判断master是否是正在运行的master if (masterUrl != activeMasterUrl) { logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.") } else { try { logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name)) // Create the executor's working directory //创建executor的工作目录 val executorDir = new File(workDir, appId + "/" + execId) //判断工作目录是否创建成功 if (!executorDir.mkdirs()) { throw new IOException("Failed to create directory " + executorDir) } // Create local dirs for the executor. These are passed to the executor via the // SPARK_LOCAL_DIRS environment variable, and deleted by the Worker when the // application finishes. // 为executor创建本地目录,通过SPARK_LOCAL_DIRS环境变量传给executor,然后在application完成的时候通过worker删除这个目录 val appLocalDirs = appDirectories.get(appId).getOrElse { Utils.getOrCreateLocalRootDirs(conf).map { dir => Utils.createDirectory(dir).getAbsolutePath() }.toSeq } appDirectories(appId) = appLocalDirs //创建一个ExecutorRunnner val manager = new ExecutorRunner( appId, execId, appDesc.copy(command = Worker.maybeUpdateSSLSettings(appDesc.command, conf)), cores_, memory_, self, workerId, host, webUi.boundPort, publicAddress, sparkHome, executorDir, akkaUrl, conf, appLocalDirs, ExecutorState.LOADING) //将Executor加入本地缓存 executors(appId + "/" + execId) = manager //启动executor manager.start() coresUsed += cores_ memoryUsed += memory_ master ! ExecutorStateChanged(appId, execId, manager.state, None, None) } catch { case e: Exception => { logError(s"Failed to launch executor $appId/$execId for ${appDesc.name}.", e) if (executors.contains(appId + "/" + execId)) { executors(appId + "/" + execId).kill() executors -= appId + "/" + execId } master ! ExecutorStateChanged(appId, execId, ExecutorState.FAILED, Some(e.toString), None) } } }
manager.start()中start()方法源码
def start() { //创建一个线程 workerThread = new Thread("ExecutorRunner for " + fullId) { override def run() { fetchAndRunExecutor() } } workerThread.start() // Shutdown hook that kills actors on shutdown. shutdownHook = new Thread() { override def run() { killProcess(Some("Worker shutting down")) } } Runtime.getRuntime.addShutdownHook(shutdownHook) }
fetchAndRunExecutor()源码
def fetchAndRunExecutor() { try { // Launch the process //封装一个ProcessBuilder val builder = CommandUtils.buildProcessBuilder(appDesc.command, memory, sparkHome.getAbsolutePath, substituteVariables) val command = builder.command() logInfo("Launch command: " + command.mkString("\"", "\" \"", "\"")) //设置builder的目录、环境变量 builder.directory(executorDir) builder.environment.put("SPARK_LOCAL_DIRS", appLocalDirs.mkString(",")) // In case we are running this from within the Spark Shell, avoid creating a "scala" // parent process for the executor command builder.environment.put("SPARK_LAUNCH_WITH_SCALA", "0") // Add webUI log urls val baseUrl = s"http://$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType=" builder.environment.put("SPARK_LOG_URL_STDERR", s"${baseUrl}stderr") builder.environment.put("SPARK_LOG_URL_STDOUT", s"${baseUrl}stdout") //启动 process = builder.start() val header = "Spark Executor Command: %s\n%s\n\n".format( command.mkString("\"", "\" \"", "\""), "=" * 40) // Redirect its stdout and stderr to files //重定向输出流到本地文件中, val stdout = new File(executorDir, "stdout") stdoutAppender = FileAppender(process.getInputStream, stdout, conf) val stderr = new File(executorDir, "stderr") Files.write(header, stderr, UTF_8) stderrAppender = FileAppender(process.getErrorStream, stderr, conf) // Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown) // or with nonzero exit code //使用waitFor方法启动Executor的进程 val exitCode = process.waitFor() //executor执行完之后拿到返回状态 state = ExecutorState.EXITED val message = "Command exited with code " + exitCode //向worker发送状态改变的消息 worker ! ExecutorStateChanged(appId, execId, state, Some(message), Some(exitCode)) } catch { case interrupted: InterruptedException => { logInfo("Runner thread for executor " + fullId + " interrupted") state = ExecutorState.KILLED killProcess(None) } case e: Exception => { logError("Error running executor", e) state = ExecutorState.FAILED killProcess(Some(e.toString)) } } }
Worker接收到ExecutorStateChanged()的源码
case ExecutorStateChanged(appId, execId, state, message, exitStatus) => //向master发送ExecutorStateChanged的消息 master ! ExecutorStateChanged(appId, execId, state, message, exitStatus) val fullId = appId + "/" + execId //判断executor的状态是否是finished if (ExecutorState.isFinished(state)) { executors.get(fullId) match { case Some(executor) => logInfo("Executor " + fullId + " finished with state " + state + message.map(" message " + _).getOrElse("") + exitStatus.map(" exitStatus " + _).getOrElse("")) //将executor从内存缓存中移除 executors -= fullId finishedExecutors(fullId) = executor //释放executor占用的内存和cpu资源 coresUsed -= executor.cores memoryUsed -= executor.memory case None => logInfo("Unknown Executor " + fullId + " finished with state " + state + message.map(" message " + _).getOrElse("") + exitStatus.map(" exitStatus " + _).getOrElse("")) } maybeCleanupApplication(appId) }
阅读全文
0 0
- Spark源码分析-worker
- Spark源码分析之Worker
- Spark源码分析之Worker
- Spark源码分析之Worker
- 源码-Spark中Worker源码分析(一)
- 源码- Spark中Worker源码分析(二)
- spark源码学习(三)---worker源码分析-worker启动driver、executor分析
- Spark分析之Worker
- Spark的Master和Worker集群启动的源码分析
- spark源码分析--Master和worker建立连接
- spark core源码分析4 worker启动流程
- spark core源码分析4 worker启动流程
- spark源码分析Master与Worker启动流程篇
- spark 1.6.0 core源码分析4 worker启动流程
- Spark集群启动之Master、Worker启动流程源码分析
- Spark源码分析之worker节点启动driver和executor
- Spark源码分析之Worker启动通信机制
- 5.Worker源码分析
- Select服务器
- LeetCode Two Sum 之 JavaScript 多种解法
- set中常用的方法
- Java基本数据类型
- 【leveldb】Leveldb实现原理分析(上)
- Spark源码分析-worker
- 手机验证码接收注册新账户
- 链表排序-LintCode
- Redux介绍
- 什么是中科呐喊Pro?中科呐喊Pro有哪些功能?
- 简单LinuxC程序关于实现进制转换
- mysql查询库中所有的表名,mysql查询指定表中的所有字段名及其相关信息
- Vysor pro 1.7.9 破解
- awk(1) awk中的函数