hadoop作业提交源码分析

来源:互联网 发布:台湾传奇网络 编辑:程序博客网 时间:2024/05/24 05:02
  public static void main(String[] args) throws Exception {    Configuration conf = new Configuration();    Job job = Job.getInstance(conf, "word count");    job.setJarByClass(WordCount.class);    job.setMapperClass(TokenizerMapper.class);    job.setCombinerClass(IntSumReducer.class);    job.setReducerClass(IntSumReducer.class);    job.setOutputKeyClass(Text.class);    job.setOutputValueClass(IntWritable.class);       FileInputFormat.addInputPath(job, new Path(args[0]));    FileOutputFormat.setOutputPath(job, new Path(args[1]));    System.exit(job.waitForCompletion(true) ? 0 : 1); // 提交作业  (true/false: 是否打印作业进度)  }
=============

  /**   * Submit the job to the cluster and wait for it to finish.   * @param verbose print the progress to the user   * @return true if the job succeeded   * @throws IOException thrown if the communication with the    *         <code>JobTracker</code> is lost   */  public boolean waitForCompletion(boolean verbose                                   ) throws IOException, InterruptedException,                                            ClassNotFoundException {    if (state == JobState.DEFINE) {   // 首先判断当前作业的状态()  public static enum JobState {DEFINE, RUNNING};      submit();  // 提交    }    if (verbose) {      monitorAndPrintJob();    // 是否打印作业进度    } else {      // get the completion poll interval from the client.      int completionPollIntervalMillis =         Job.getCompletionPollInterval(cluster.getConf());      while (!isComplete()) {        try {          Thread.sleep(completionPollIntervalMillis);        } catch (InterruptedException ie) {        }      }    }    return isSuccessful();  }
=============

  /**
   * Submit the job to the cluster and return immediately.
   * @throws IOException
   */
  public void submit()
         throws IOException, InterruptedException, ClassNotFoundException {
    ensureState(JobState.DEFINE);    // 确认作业状态
    setUseNewAPI();    // 设置接口API使用。默认都使用新的API (mapreduce.* 代替 mapre.*)
    connect();    // 根据配置信息连接到cluster实例
    final JobSubmitter submitter =
        getJobSubmitter(cluster.getFileSystem(), cluster.getClient());
    status = ugi.doAs(new PrivilegedExceptionAction<JobStatus>() {
      public JobStatus run() throws IOException, InterruptedException,
      ClassNotFoundException {
        return submitter.submitJobInternal(Job.this, cluster);
 /*
The job submission process involves:

1. Checking the input and output specifications of the job.
2. Computing the InputSplits for the job.
3. Setup the requisite accounting information for the DistributedCache of the job, if necessary.
4. Copying the job's jar and configuration to the map-reduce system directory on the distributed file-system.
5. Submitting the job to the JobTracker and optionally monitoring it's status.
*/

      }
    });
    state = JobState.RUNNING;    // 设置作业状态为running状态
    LOG.info("The url to track the job: " + getTrackingURL());
   }
=============
  private synchronized void connect()
          throws IOException, InterruptedException, ClassNotFoundException {
    if (cluster == null) {
        //通过UserGroupInfo对象的doAs方法获取cluster实例。首先会检查用户的权限是否足够。
      cluster = ugi.doAs(new PrivilegedExceptionAction<Cluster>() { 
                   public Cluster run()
                          throws IOException, InterruptedException,
                                 ClassNotFoundException {
                     return new Cluster(getConfiguration()); //根据配置信息初始化集群实例对象。
                   }
                 });
    }
  }
=============

  public Cluster(InetSocketAddress jobTrackAddr, Configuration conf)
      throws IOException {
    this.conf = conf;
    this.ugi = UserGroupInformation.getCurrentUser();
    initialize(jobTrackAddr, conf); //初始化ClientProtocol, 用于JobClient和JobTracker之间的通信。
  }
=============
//初始化ClientProtocol, 用于JobClient和JobTracker之间的通信。
  private void initialize(InetSocketAddress jobTrackAddr, Configuration conf)
      throws IOException {

    synchronized (frameworkLoader) {
      for (ClientProtocolProvider provider : frameworkLoader) {
        LOG.debug("Trying ClientProtocolProvider : "
            + provider.getClass().getName());
        ClientProtocol clientProtocol = null;
        try {
          if (jobTrackAddr == null) {
            clientProtocol = provider.create(conf);
          } else {
            clientProtocol = provider.create(jobTrackAddr, conf);
          }

          if (clientProtocol != null) {
            clientProtocolProvider = provider;
            client = clientProtocol;
            LOG.debug("Picked " + provider.getClass().getName()
                + " as the ClientProtocolProvider");
            break;
          }
          else {
            LOG.debug("Cannot pick " + provider.getClass().getName()
                + " as the ClientProtocolProvider - returned null protocol");
          }
        }
        catch (Exception e) {
          LOG.info("Failed to use " + provider.getClass().getName()
              + " due to error: ", e);
        }
      }
    }

    if (null == clientProtocolProvider || null == client) {
      throw new IOException(
          "Cannot initialize Cluster. Please check your configuration for "
              + MRConfig.FRAMEWORK_NAME
              + " and the correspond server addresses.");
    }
  }

 

                                             
0 0