hadoop作业提交源码分析

来源：互联网发布：台湾传奇网络编辑：程序博客网时间：2024/05/24 05:02
  public static void main(String[] args) throws Exception {    Configuration conf = new Configuration();    Job job = Job.getInstance(conf, "word count");    job.setJarByClass(WordCount.class);    job.setMapperClass(TokenizerMapper.class);    job.setCombinerClass(IntSumReducer.class);    job.setReducerClass(IntSumReducer.class);    job.setOutputKeyClass(Text.class);    job.setOutputValueClass(IntWritable.class);       FileInputFormat.addInputPath(job, new Path(args[0]));    FileOutputFormat.setOutputPath(job, new Path(args[1]));    System.exit(job.waitForCompletion(true) ? 0 : 1); // 提交作业  （true/false： 是否打印作业进度）  }=============

  /**   * Submit the job to the cluster and wait for it to finish.   * @param verbose print the progress to the user   * @return true if the job succeeded   * @throws IOException thrown if the communication with the    *         <code>JobTracker</code> is lost   */  public boolean waitForCompletion(boolean verbose                                   ) throws IOException, InterruptedException,                                            ClassNotFoundException {    if (state == JobState.DEFINE) {   // 首先判断当前作业的状态（）  public static enum JobState {DEFINE, RUNNING};      submit();  // 提交    }    if (verbose) {      monitorAndPrintJob();    // 是否打印作业进度    } else {      // get the completion poll interval from the client.      int completionPollIntervalMillis =         Job.getCompletionPollInterval(cluster.getConf());      while (!isComplete()) {        try {          Thread.sleep(completionPollIntervalMillis);        } catch (InterruptedException ie) {        }      }    }    return isSuccessful();  }=============

  /**
   * Submit the job to the cluster and return immediately.
   * @throws IOException
   */
  public void submit() 
         throws IOException, InterruptedException, ClassNotFoundException {
    ensureState(JobState.DEFINE);    // 确认作业状态
    setUseNewAPI();    // 设置接口API使用。默认都使用新的API (mapreduce.* 代替 mapre.*）
    connect();    // 根据配置信息连接到cluster实例
    final JobSubmitter submitter = 
        getJobSubmitter(cluster.getFileSystem(), cluster.getClient());
    status = ugi.doAs(new PrivilegedExceptionAction<JobStatus>() {
      public JobStatus run() throws IOException, InterruptedException, 
      ClassNotFoundException {
        return submitter.submitJobInternal(Job.this, cluster);
 /*
The job submission process involves:

1. Checking the input and output specifications of the job.
2. Computing the InputSplits for the job.
3. Setup the requisite accounting information for the DistributedCache of the job, if necessary.
4. Copying the job's jar and configuration to the map-reduce system directory on the distributed file-system.
5. Submitting the job to the JobTracker and optionally monitoring it's status.
*/
      }
    });
    state = JobState.RUNNING;    // 设置作业状态为running状态
    LOG.info("The url to track the job: " + getTrackingURL());
   }
=============
  private synchronized void connect()
          throws IOException, InterruptedException, ClassNotFoundException {
    if (cluster == null) {
        //通过UserGroupInfo对象的doAs方法获取cluster实例。首先会检查用户的权限是否足够。
      cluster = ugi.doAs(new PrivilegedExceptionAction<Cluster>() { 
                   public Cluster run()
                          throws IOException, InterruptedException, 
                                 ClassNotFoundException {
                     return new Cluster(getConfiguration()); //根据配置信息初始化集群实例对象。
                   }
                 });
    }
  }
=============

  public Cluster(InetSocketAddress jobTrackAddr, Configuration conf)
      throws IOException {
    this.conf = conf;
    this.ugi = UserGroupInformation.getCurrentUser();
    initialize(jobTrackAddr, conf); //初始化ClientProtocol, 用于JobClient和JobTracker之间的通信。
  }
=============
//初始化ClientProtocol, 用于JobClient和JobTracker之间的通信。
  private void initialize(InetSocketAddress jobTrackAddr, Configuration conf)
      throws IOException {

    synchronized (frameworkLoader) {
      for (ClientProtocolProvider provider : frameworkLoader) {
        LOG.debug("Trying ClientProtocolProvider : "
            + provider.getClass().getName());
        ClientProtocol clientProtocol = null; 
        try {
          if (jobTrackAddr == null) {
            clientProtocol = provider.create(conf);
          } else {
            clientProtocol = provider.create(jobTrackAddr, conf);
          }

          if (clientProtocol != null) {
            clientProtocolProvider = provider;
            client = clientProtocol;
            LOG.debug("Picked " + provider.getClass().getName()
                + " as the ClientProtocolProvider");
            break;
          }
          else {
            LOG.debug("Cannot pick " + provider.getClass().getName()
                + " as the ClientProtocolProvider - returned null protocol");
          }
        } 
        catch (Exception e) {
          LOG.info("Failed to use " + provider.getClass().getName()
              + " due to error: ", e);
        }
      }
    }

    if (null == clientProtocolProvider || null == client) {
      throw new IOException(
          "Cannot initialize Cluster. Please check your configuration for "
              + MRConfig.FRAMEWORK_NAME
              + " and the correspond server addresses.");
    }
  }
 
                                                     0        0           	
					
					   hadoop作业提交源码分析
	  	   Hadoop作业提交源码分析
	  	   Hadoop作业提交分析
	  	   Hadoop-2.7.3源码分析：MapReduce作业提交源码跟踪
	  	   Hadoop调度源码分析 作业提交到完成初始化部分
	  	   MapReduce作业提交源码分析
	  	   Job作业提交源码分析
	  	   Hadoop-2.4.1源码分析--MapReduce作业(job)提交源码跟踪
	  	   Hadoop作业提交分析（一）
	  	   Hadoop作业提交分析（二）
	  	   Hadoop作业提交分析（三）
	  	   Hadoop作业提交分析（一）
	  	   Hadoop作业提交分析（一）
	  	   Hadoop作业提交分析（二）
	  	   Hadoop作业提交分析（三）
	  	   Hadoop作业提交分析（四）
	  	   Hadoop作业提交分析（五）
	  	   Hadoop作业提交分析（一）
	     		  
	  	   如何找到Mysql数据存储的路径
	  	   CROC 2016
	  	   解决mysql LOCK TABLES 后事务无法回滚的问题
	  	   召回率 Recall、精确度Precision、准确率Accuracy、虚警、漏警等分类判定指标
	  	   解绑
	  	   hadoop作业提交源码分析
	  	   spring结合cxf中web.xml异常
	  	   wamPSever，thinkphp的安装配置及简单网页加法器实现
	  	   usaco2.1 hamming（blog 写到一半发现大事不妙
	  	   历届试题 回文数字
	  	   20170531动态破解
	  	   WordPress 模板层次详细介绍
	  	   npm模块之http-proxy-middleware使用教程（译）
	  	   SpannableStringBuilder设置TextView文字格式（颜色、背景色、下划线、删除线等）