spark task启动

来源:互联网 发布:防伪税控开票软件 编辑:程序博客网 时间:2024/06/05 01:07
    woker接受到LaunchTask消息后会启动task,以local模式为例,代码如下。task是由executor来启动的
override def receiveWithLogging = {  case ReviveOffers =>    reviveOffers()    ......

def reviveOffers() {  val offers = Seq(new WorkerOffer(localExecutorId, localExecutorHostname, freeCores))  val tasks = scheduler.resourceOffers(offers).flatten  //把task循环启动起来  for (task <- tasks) {    freeCores -= scheduler.CPUS_PER_TASK    executor.launchTask(executorBackend, taskId = task.taskId, attemptNumber = task.attemptNumber,      task.name, task.serializedTask)  }  if (tasks.isEmpty && scheduler.activeTaskSets.nonEmpty) {    // Try to reviveOffer after 1 second, because scheduler may wait for locality timeout    context.system.scheduler.scheduleOnce(1000 millis, self, ReviveOffers)  }}

launchTask会将task封装为TaskRunner,放入线程池中运行,此时task是被序列化的
def launchTask(    context: ExecutorBackend,    taskId: Long,    attemptNumber: Int,    taskName: String,    serializedTask: ByteBuffer) {  val tr = new TaskRunner(context, taskId = taskId, attemptNumber = attemptNumber, taskName,    serializedTask)  runningTasks.put(taskId, tr)  threadPool.execute(tr)}
TaskRunner是一个实现Runnable的线程执行体,被放入线程池后续先进性反序列化,然后执行该task,汇报task状态为running
override def run() {    val deserializeStartTime = System.currentTimeMillis()    Thread.currentThread.setContextClassLoader(replClassLoader)    val ser = env.closureSerializer.newInstance()    logInfo(s"Running $taskName (TID $taskId)")    execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)    var taskStart: Long = 0    startGCTime = gcTime    try {    //反序列化      val (taskFiles, taskJars, taskBytes) = Task.deserializeWithDependencies(serializedTask)      updateDependencies(taskFiles, taskJars)      task = ser.deserialize[Task[Any]](taskBytes, Thread.currentThread.getContextClassLoader)      // If this task has been killed before we deserialized it, let's quit now. Otherwise,      // continue executing the task.      if (killed) {        // Throw an exception rather than returning, because returning within a try{} block        // causes a NonLocalReturnControl exception to be thrown. The NonLocalReturnControl        // exception will be caught by the catch block, leading to an incorrect ExceptionFailure        // for the task.        throw new TaskKilledException      }      attemptedTask = Some(task)      logDebug("Task " + taskId + "'s epoch is " + task.epoch)      env.mapOutputTracker.updateEpoch(task.epoch)      // 开始执行task      taskStart = System.currentTimeMillis()      val value = task.run(taskAttemptId = taskId, attemptNumber = attemptNumber)      val taskFinish = System.currentTimeMillis()      // If the task has been killed, let's fail it.      if (task.killed) {        throw new TaskKilledException      }      ......      val directResult = new DirectTaskResult(valueBytes, accumUpdates, task.metrics.orNull)      val serializedDirectResult = ser.serialize(directResult)      val resultSize = serializedDirectResult.limit      // directSend = sending directly back to the driver      val serializedResult = {        if (maxResultSize > 0 && resultSize > maxResultSize) {          logWarning(s"Finished $taskName (TID $taskId). Result is larger than maxResultSize " +            s"(${Utils.bytesToString(resultSize)} > ${Utils.bytesToString(maxResultSize)}), " +            s"dropping it.")          ser.serialize(new IndirectTaskResult[Any](TaskResultBlockId(taskId), resultSize))        } else if (resultSize >= akkaFrameSize - AkkaUtils.reservedSizeBytes) {          val blockId = TaskResultBlockId(taskId)          env.blockManager.putBytes(            blockId, serializedDirectResult, StorageLevel.MEMORY_AND_DISK_SER)          logInfo(            s"Finished $taskName (TID $taskId). $resultSize bytes result sent via BlockManager)")          ser.serialize(new IndirectTaskResult[Any](blockId, resultSize))        } else {          logInfo(s"Finished $taskName (TID $taskId). $resultSize bytes result sent to driver")          serializedDirectResult        }      }      execBackend.statusUpdate(taskId, TaskState.FINISHED, serializedResult)    } catch {      .....    } finally {      // Release memory used by this thread for shuffles      env.shuffleMemoryManager.releaseMemoryForThisThread()      // Release memory used by this thread for unrolling blocks      env.blockManager.memoryStore.releaseUnrollMemoryForThisThread()      // Release memory used by this thread for accumulators      Accumulators.clear()      runningTasks.remove(taskId)    }  }}    


0 0