spark task启动
来源:互联网 发布:防伪税控开票软件 编辑:程序博客网 时间:2024/06/05 01:07
woker接受到LaunchTask消息后会启动task,以local模式为例,代码如下。task是由executor来启动的
launchTask会将task封装为TaskRunner,放入线程池中运行,此时task是被序列化的
override def receiveWithLogging = { case ReviveOffers => reviveOffers() ......
def reviveOffers() { val offers = Seq(new WorkerOffer(localExecutorId, localExecutorHostname, freeCores)) val tasks = scheduler.resourceOffers(offers).flatten //把task循环启动起来 for (task <- tasks) { freeCores -= scheduler.CPUS_PER_TASK executor.launchTask(executorBackend, taskId = task.taskId, attemptNumber = task.attemptNumber, task.name, task.serializedTask) } if (tasks.isEmpty && scheduler.activeTaskSets.nonEmpty) { // Try to reviveOffer after 1 second, because scheduler may wait for locality timeout context.system.scheduler.scheduleOnce(1000 millis, self, ReviveOffers) }}
launchTask会将task封装为TaskRunner,放入线程池中运行,此时task是被序列化的
def launchTask( context: ExecutorBackend, taskId: Long, attemptNumber: Int, taskName: String, serializedTask: ByteBuffer) { val tr = new TaskRunner(context, taskId = taskId, attemptNumber = attemptNumber, taskName, serializedTask) runningTasks.put(taskId, tr) threadPool.execute(tr)}TaskRunner是一个实现Runnable的线程执行体,被放入线程池后续先进性反序列化,然后执行该task,汇报task状态为running
override def run() { val deserializeStartTime = System.currentTimeMillis() Thread.currentThread.setContextClassLoader(replClassLoader) val ser = env.closureSerializer.newInstance() logInfo(s"Running $taskName (TID $taskId)") execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER) var taskStart: Long = 0 startGCTime = gcTime try { //反序列化 val (taskFiles, taskJars, taskBytes) = Task.deserializeWithDependencies(serializedTask) updateDependencies(taskFiles, taskJars) task = ser.deserialize[Task[Any]](taskBytes, Thread.currentThread.getContextClassLoader) // If this task has been killed before we deserialized it, let's quit now. Otherwise, // continue executing the task. if (killed) { // Throw an exception rather than returning, because returning within a try{} block // causes a NonLocalReturnControl exception to be thrown. The NonLocalReturnControl // exception will be caught by the catch block, leading to an incorrect ExceptionFailure // for the task. throw new TaskKilledException } attemptedTask = Some(task) logDebug("Task " + taskId + "'s epoch is " + task.epoch) env.mapOutputTracker.updateEpoch(task.epoch) // 开始执行task taskStart = System.currentTimeMillis() val value = task.run(taskAttemptId = taskId, attemptNumber = attemptNumber) val taskFinish = System.currentTimeMillis() // If the task has been killed, let's fail it. if (task.killed) { throw new TaskKilledException } ...... val directResult = new DirectTaskResult(valueBytes, accumUpdates, task.metrics.orNull) val serializedDirectResult = ser.serialize(directResult) val resultSize = serializedDirectResult.limit // directSend = sending directly back to the driver val serializedResult = { if (maxResultSize > 0 && resultSize > maxResultSize) { logWarning(s"Finished $taskName (TID $taskId). Result is larger than maxResultSize " + s"(${Utils.bytesToString(resultSize)} > ${Utils.bytesToString(maxResultSize)}), " + s"dropping it.") ser.serialize(new IndirectTaskResult[Any](TaskResultBlockId(taskId), resultSize)) } else if (resultSize >= akkaFrameSize - AkkaUtils.reservedSizeBytes) { val blockId = TaskResultBlockId(taskId) env.blockManager.putBytes( blockId, serializedDirectResult, StorageLevel.MEMORY_AND_DISK_SER) logInfo( s"Finished $taskName (TID $taskId). $resultSize bytes result sent via BlockManager)") ser.serialize(new IndirectTaskResult[Any](blockId, resultSize)) } else { logInfo(s"Finished $taskName (TID $taskId). $resultSize bytes result sent to driver") serializedDirectResult } } execBackend.statusUpdate(taskId, TaskState.FINISHED, serializedResult) } catch { ..... } finally { // Release memory used by this thread for shuffles env.shuffleMemoryManager.releaseMemoryForThisThread() // Release memory used by this thread for unrolling blocks env.blockManager.memoryStore.releaseUnrollMemoryForThisThread() // Release memory used by this thread for accumulators Accumulators.clear() runningTasks.remove(taskId) } }}
0 0
- spark task启动
- spark源码学习(八)--- executor启动task分析
- Spark-task相关
- spark中的task 分割
- Spark Task执行原理
- [spark] Task执行流程
- spark出现task org.apache.spark.SparkException: Task not serializable
- Spark源码阅读笔记:Spark的Task
- spark源码-stage和 task
- spark Task序列化问题
- spark job, stage ,task介绍。
- spark lost task 异常 笔记
- spark job, stage, task介绍
- spark work task 源码分析
- spark task 任务状态管理
- Spark源码分析之Task
- Spark启动
- spark启动
- retrofit2.0转换String 报错修复
- java.net.BindException: Cannot assign requested address解决之道
- Servlet:JSP理解
- linux非专业常用命令
- 参数 of jQuery.ajax()
- spark task启动
- Linux设备驱动之mmap设备操作
- C同学的工作笔记 mac下android ndk环境搭建
- 北京第五代公厕投入使用 有WiFi有充电桩
- hibernate入门(一)
- 关于using namespace std
- 中文字符处理encodeURI编码与decodeURI解码
- java单例模式的7种写法
- Java Url请求方式中文乱码问题解决办法