spark core 2.0 Executor Heartbeat

来源:互联网 发布:校园网网络拓扑结构图 编辑:程序博客网 时间:2024/05/22 09:48

Executor heartbeat 调度一个任务向driver汇报心跳和部分度量信息

  /**   * Schedules a task to report heartbeat and partial metrics for active tasks to driver.   */  private def startDriverHeartbeater(): Unit = {    val intervalMs = conf.getTimeAsMs("spark.executor.heartbeatInterval", "10s")    // Wait a random interval so the heartbeats don't end up in sync    val initialDelay = intervalMs + (math.random * intervalMs).asInstanceOf[Int]    val heartbeatTask = new Runnable() {      override def run(): Unit = Utils.logUncaughtExceptions(reportHeartBeat())    }    heartbeater.scheduleAtFixedRate(heartbeatTask, initialDelay, intervalMs, TimeUnit.MILLISECONDS)  }}

heartbeater是一个后台周期性的执行器。周期性的调用heartbeatTask这个任务。


  private val heartbeater = ThreadUtils.newDaemonSingleThreadScheduledExecutor("driver-heartbeater")

heartbeatTask仅调用reportHeartBeat方法,如下


/** Reports heartbeat and metrics for active tasks to the driver. */  private def reportHeartBeat(): Unit = {    // list of (task id, accumUpdates) to send back to the driver    val accumUpdates = new ArrayBuffer[(Long, Seq[AccumulatorV2[_, _]])]()    val curGCTime = computeTotalGcTime()    for (taskRunner <- runningTasks.values().asScala) {      if (taskRunner.task != null) {        taskRunner.task.metrics.mergeShuffleReadMetrics()        taskRunner.task.metrics.setJvmGCTime(curGCTime - taskRunner.startGCTime)        accumUpdates += ((taskRunner.taskId, taskRunner.task.metrics.accumulators()))      }    }    val message = Heartbeat(executorId, accumUpdates.toArray, env.blockManager.blockManagerId)    try {      val response = heartbeatReceiverRef.askWithRetry[HeartbeatResponse](          message, RpcTimeout(conf, "spark.executor.heartbeatInterval", "10s"))      if (response.reregisterBlockManager) {        logInfo("Told to re-register on heartbeat")        env.blockManager.reregister()      }      heartbeatFailures = 0    } catch {      case NonFatal(e) =>        logWarning("Issue communicating with driver in heartbeater", e)        heartbeatFailures += 1        if (heartbeatFailures >= HEARTBEAT_MAX_FAILURES) {          logError(s"Exit as unable to send heartbeats to driver " +            s"more than $HEARTBEAT_MAX_FAILURES times")          System.exit(ExecutorExitCode.HEARTBEAT_FAILURE)        }    }  }



0 0
原创粉丝点击