Hadoop JobTracker之ExpireLaunchingTasks

来源:互联网 发布:程序员用的浏览器 编辑:程序博客网 时间:2024/06/05 06:31
ExpireLaunchingTasks为JobTracker的后台线程,该线程记录了task的启动时间,每当TaskTracker发送心跳时,JobTracker会分配合适的任务给TaskTracker,并且将TaskAttemptID和启动时间以键值对的形式记录到launchingTasks集合中,待下次心跳如果JT得知该任务启动成功,则会从launchingTasks集合中删除,否则记录失败信息。我们可以把这个数据结构看作待启动任务的集合。
private class ExpireLaunchingTasks implements Runnable {    /**     * This is a map of the tasks that have been assigned to task trackers,     * but that have not yet been seen in a status report.     * map: task-id -> time-assigned      * 注释原文写的很明白了,该集合记录是已经分配,但还没反馈信息的任务     */    private Map<TaskAttemptID, Long> launchingTasks =      new LinkedHashMap<TaskAttemptID, Long>();          public void run() {      while (true) {        try {          // 大约每三分钟检测一次,相关参数:mapred.tasktracker.expiry.interval          Thread.sleep(TASKTRACKER_EXPIRY_INTERVAL/3);          long now = clock.getTime();          if(LOG.isDebugEnabled()) {            LOG.debug("Starting launching task sweep");          }          synchronized (JobTracker.this) {            synchronized (launchingTasks) {            //遍历任务集合              Iterator<Map.Entry<TaskAttemptID, Long>> itr =                launchingTasks.entrySet().iterator();              while (itr.hasNext()) {                Map.Entry<TaskAttemptID, Long> pair = itr.next();                TaskAttemptID taskId = pair.getKey();                //计算启动耗费时间                long age = now - (pair.getValue()).longValue();                LOG.info(taskId + " is " + age + " ms debug.");                //如果超时,则标记任务失败                if (age > TASKTRACKER_EXPIRY_INTERVAL) {                  LOG.info("Launching task " + taskId + " timed out.");                  TaskInProgress tip = null;                  tip = taskidToTIPMap.get(taskId);                  if (tip != null) {                    JobInProgress job = tip.getJob();                    String trackerName = getAssignedTracker(taskId);                    TaskTrackerStatus trackerStatus =                       getTaskTrackerStatus(trackerName);                                           // This might happen when the tasktracker has already                    // expired and this thread tries to call failedtask                    // again. expire tasktracker should have called failed                    // task!                    if (trackerStatus != null)                      job.failedTask(tip, taskId, "Error launching task",                                      tip.isMapTask()? TaskStatus.Phase.MAP:                                     TaskStatus.Phase.STARTING,                                     TaskStatus.State.FAILED,                                     trackerName);                  }                  //从集合中删除                  itr.remove();                } else {                  // the tasks are sorted by start time, so once we find                  // one that we want to keep, we are done for this cycle.                  break;                }              }            }          }        } catch (InterruptedException ie) {          // all done          break;        } catch (Exception e) {          LOG.error("Expire Launching Task Thread got exception: " +                    StringUtils.stringifyException(e));        }      }    }    //在JT返回心跳时,如果成功分配了任务则将任务信息记录    public void addNewTask(TaskAttemptID taskName) {      synchronized (launchingTasks) {        launchingTasks.put(taskName,                            clock.getTime());      }    }    //同上,在任务变为非启动状态后,从集合中删除    public void removeTask(TaskAttemptID taskName) {      synchronized (launchingTasks) {        launchingTasks.remove(taskName);      }    }  }
记录任务启动失败的操作如下:
   /**   * Fail a task with a given reason, but without a status object.   *    * Assuming {@link JobTracker} is locked on entry.   *    * @param tip The task's tip   * @param taskid The task id   * @param reason The reason that the task failed   * @param trackerName The task tracker the task failed on   */  public void failedTask(TaskInProgress tip, TaskAttemptID taskid, String reason,                          TaskStatus.Phase phase, TaskStatus.State state,                          String trackerName) {    TaskStatus status = TaskStatus.createTaskStatus(tip.isMapTask(),                                                     taskid,                                                    0.0f,                                                    tip.isMapTask() ?                                                         numSlotsPerMap :                                                         numSlotsPerReduce,                                                    state,                                                    reason,                                                    reason,                                                    trackerName, phase,                                                    new Counters());    // update the actual start-time of the attempt    TaskStatus oldStatus = tip.getTaskStatus(taskid);     long startTime = oldStatus == null                     ? jobtracker.getClock().getTime()                     : oldStatus.getStartTime();    status.setStartTime(startTime);    status.setFinishTime(jobtracker.getClock().getTime());    boolean wasComplete = tip.isComplete();    updateTaskStatus(tip, status);    boolean isComplete = tip.isComplete();    if (wasComplete && !isComplete) { // mark a successful tip as failed      String taskType = getTaskType(tip);      JobHistory.Task.logFailed(tip.getTIPId(), taskType,                                 tip.getExecFinishTime(), reason, taskid);    }  }


原创粉丝点击