jobclient 源码分析

来源:互联网 发布:c语言做图形界面 编辑:程序博客网 时间:2024/05/29 04:47
* Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements.  See the NOTICE file * distributed with this work for additional information * regarding copyright ownership.  The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package org.apache.hadoop.mapred;import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.ByteArrayInputStream;import java.io.DataInputStream;import java.io.File;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.OutputStream;import java.io.OutputStreamWriter;import java.net.InetAddress;import java.net.InetSocketAddress;import java.net.URI;import java.net.URISyntaxException;import java.net.URL;import java.net.URLConnection;import java.net.UnknownHostException;import java.security.PrivilegedExceptionAction;import java.util.Arrays;import java.util.Collection;import java.util.Comparator;import java.util.HashMap;import java.util.List;import java.util.Map;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import org.apache.hadoop.classification.InterfaceAudience;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.filecache.DistributedCache;import org.apache.hadoop.filecache.TrackerDistributedCacheManager;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.FileUtil;import org.apache.hadoop.fs.Path;import org.apache.hadoop.fs.permission.FsPermission;import org.apache.hadoop.hdfs.DFSClient;import org.apache.hadoop.io.IOUtils;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.retry.RetryPolicies;import org.apache.hadoop.io.retry.RetryPolicy;import org.apache.hadoop.io.retry.RetryProxy;import org.apache.hadoop.io.retry.RetryUtils;import org.apache.hadoop.ipc.RPC;import org.apache.hadoop.ipc.RemoteException;import org.apache.hadoop.mapred.Counters.Counter;import org.apache.hadoop.mapred.Counters.Group;import org.apache.hadoop.mapred.QueueManager.QueueACL;import org.apache.hadoop.mapreduce.InputFormat;import org.apache.hadoop.mapreduce.InputSplit;import org.apache.hadoop.mapreduce.JobContext;import org.apache.hadoop.mapreduce.JobSubmissionFiles;import org.apache.hadoop.mapreduce.security.TokenCache;import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;import org.apache.hadoop.mapreduce.split.JobSplitWriter;import org.apache.hadoop.net.NetUtils;import org.apache.hadoop.security.AccessControlException;import org.apache.hadoop.security.Credentials;import org.apache.hadoop.security.SecurityUtil;import org.apache.hadoop.security.UserGroupInformation;import org.apache.hadoop.security.authorize.AccessControlList;import org.apache.hadoop.security.token.SecretManager.InvalidToken;import org.apache.hadoop.security.token.Token;import org.apache.hadoop.security.token.TokenRenewer;import org.apache.hadoop.util.ReflectionUtils;import org.apache.hadoop.util.StringUtils;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;import org.codehaus.jackson.JsonParseException;import org.codehaus.jackson.map.JsonMappingException;import org.codehaus.jackson.map.ObjectMapper;/** * <code>JobClient</code> is the primary interface for the user-job to interact * with the {@link JobTracker}. * jobclient 是用户job 与jobtracker交互的主要接口。 * 它提供提交作业的功能 并且跟踪job的进度 接受组件任务的报告和log 获得mapreduce集群的状态信息等 * <code>JobClient</code> provides facilities to submit jobs, track their  * progress, access component-tasks' reports/logs, get the Map-Reduce cluster * status information etc. *  * <p>The job submission process involves: * <ol> *   <li> *   Checking the input and output specifications of the job. *   检查指定输入输出文件的有效性 *   </li> *   <li> *   Computing the {@link InputSplit}s for the job. *   计算job的分割文件 *   </li> *   <li> *   Setup the requisite accounting information for the {@link DistributedCache}  *   of the job, if necessary. *   设置job的需要信息 *   </li> *   <li> *   Copying the job's jar and configuration to the map-reduce system directory  *   on the distributed file-system.  *   copy job的jar包和配置文件到mapreduce系统目录 *   </li> *   <li> *   Submitting the job to the <code>JobTracker</code> and optionally monitoring *   it's status. *   提交作业到jobtracker 并且选择性的监控状态 *   </li> * </ol></p> *   * Normally the user creates the application, describes various facets of the * job via {@link JobConf} and then uses the <code>JobClient</code> to submit  * the job and monitor its progress. *  * <p>Here is an example on how to use <code>JobClient</code>:</p> * <p><blockquote><pre> *     // Create a new JobConf *     JobConf job = new JobConf(new Configuration(), MyJob.class); *      *     // Specify various job-specific parameters      *     job.setJobName("myjob"); *      *     job.setInputPath(new Path("in")); *     job.setOutputPath(new Path("out")); *      *     job.setMapperClass(MyJob.MyMapper.class); *     job.setReducerClass(MyJob.MyReducer.class); * *     // Submit the job, then poll for progress until the job is complete *     JobClient.runJob(job); * </pre></blockquote></p> *  * <h4 id="JobControl">Job Control</h4> *  * <p>At times clients would chain map-reduce jobs to accomplish complex tasks  * which cannot be done via a single map-reduce job. This is fairly easy since  * the output of the job, typically, goes to distributed file-system and that  * can be used as the input for the next job.</p> * chainmap的说明 * <p>However, this also means that the onus on ensuring jobs are complete  * (success/failure) lies squarely on the clients. In such situations the  * various job-control options are: * 然而,这也意味着,确保工作的责任完成(成功/失败),完全在于对客户端 * <ol> *   <li> *   {@link #runJob(JobConf)} : submits the job and returns only after  *   the job has completed. *   提交作业 并且只有job完成时候才返回 *   </li> *   <li> *   {@link #submitJob(JobConf)} : only submits the job, then poll the  *   returned handle to the {@link RunningJob} to query status and make  *   scheduling decisions. *   仅仅提交作业,然后 查询返回的 runningjob的句柄  查出它的状态和做出调度决定 *   </li> *   <li> *   {@link JobConf#setJobEndNotificationURI(String)} : setup a notification *   on job-completion, thus avoiding polling. *   设置job完成时候 不通知 这样就避免了查询 *   </li> * </ol></p> *  * @see JobConf * @see ClusterStatus * @see Tool * @see DistributedCache */public class JobClient extends Configured implements MRConstants, Tool  {  private static final Log LOG = LogFactory.getLog(JobClient.class);  public static enum TaskStatusFilter { NONE, KILLED, FAILED, SUCCEEDED, ALL }  private TaskStatusFilter taskOutputFilter = TaskStatusFilter.FAILED;   private static final long MAX_JOBPROFILE_AGE = 1000 * 2;  static{    Configuration.addDefaultResource("mapred-default.xml");    Configuration.addDefaultResource("mapred-site.xml");  }  /**   * A NetworkedJob is an implementation of RunningJob.  It holds   * a JobProfile object to provide some info, and interacts with the   * remote service to provide certain functionality.   */  static class NetworkedJob implements RunningJob {    private JobSubmissionProtocol jobSubmitClient;    JobProfile profile;    JobStatus status;    long statustime;    /**     * We store a JobProfile and a timestamp for when we last     * acquired the job profile.  If the job is null, then we cannot     * perform any of the tasks, so we throw an exception.     * The job might be null if the JobTracker has completely forgotten     * about the job.  (eg, 24 hours after the job completes.)     */    public NetworkedJob(JobStatus job, JobProfile prof, JobSubmissionProtocol jobSubmitClient) throws IOException {      this.status = job;      this.profile = prof;      this.jobSubmitClient = jobSubmitClient;      if(this.status == null) {        throw new IOException("The Job status cannot be null");      }      if(this.profile == null) {        throw new IOException("The Job profile cannot be null");      }      if(this.jobSubmitClient == null) {        throw new IOException("The Job Submission Protocol cannot be null");      }      this.statustime = System.currentTimeMillis();    }        /**     * Some methods rely on having a recent job profile object.  Refresh     * it, if necessary     */    synchronized void ensureFreshStatus() throws IOException {      if (System.currentTimeMillis() - statustime > MAX_JOBPROFILE_AGE) {        updateStatus();      }    }        /** Some methods need to update status immediately. So, refresh     * immediately     * @throws IOException     */    synchronized void updateStatus() throws IOException {      this.status = jobSubmitClient.getJobStatus(profile.getJobID());      if(this.status == null) {        throw new IOException("The job appears to have been removed.");       }      this.statustime = System.currentTimeMillis();    }    /**     * An identifier for the job     */    public JobID getID() {      return profile.getJobID();    }        /** @deprecated This method is deprecated and will be removed. Applications should      * rather use {@link #getID()}.*/    @Deprecated    public String getJobID() {      return profile.getJobID().toString();    }        /**     * The user-specified job name     */    public String getJobName() {      return profile.getJobName();    }    /**     * The name of the job file     */    public String getJobFile() {      return profile.getJobFile();    }    /**     * A URL where the job's status can be seen     */    public String getTrackingURL() {      return profile.getURL().toString();    }    /**     * A float between 0.0 and 1.0, indicating the % of map work     * completed.     */    public float mapProgress() throws IOException {      ensureFreshStatus();      return status.mapProgress();    }