jobclient 源码分析
来源:互联网 发布:c语言做图形界面 编辑:程序博客网 时间:2024/05/29 04:47
* Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package org.apache.hadoop.mapred;import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.ByteArrayInputStream;import java.io.DataInputStream;import java.io.File;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.OutputStream;import java.io.OutputStreamWriter;import java.net.InetAddress;import java.net.InetSocketAddress;import java.net.URI;import java.net.URISyntaxException;import java.net.URL;import java.net.URLConnection;import java.net.UnknownHostException;import java.security.PrivilegedExceptionAction;import java.util.Arrays;import java.util.Collection;import java.util.Comparator;import java.util.HashMap;import java.util.List;import java.util.Map;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import org.apache.hadoop.classification.InterfaceAudience;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.filecache.DistributedCache;import org.apache.hadoop.filecache.TrackerDistributedCacheManager;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.FileUtil;import org.apache.hadoop.fs.Path;import org.apache.hadoop.fs.permission.FsPermission;import org.apache.hadoop.hdfs.DFSClient;import org.apache.hadoop.io.IOUtils;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.retry.RetryPolicies;import org.apache.hadoop.io.retry.RetryPolicy;import org.apache.hadoop.io.retry.RetryProxy;import org.apache.hadoop.io.retry.RetryUtils;import org.apache.hadoop.ipc.RPC;import org.apache.hadoop.ipc.RemoteException;import org.apache.hadoop.mapred.Counters.Counter;import org.apache.hadoop.mapred.Counters.Group;import org.apache.hadoop.mapred.QueueManager.QueueACL;import org.apache.hadoop.mapreduce.InputFormat;import org.apache.hadoop.mapreduce.InputSplit;import org.apache.hadoop.mapreduce.JobContext;import org.apache.hadoop.mapreduce.JobSubmissionFiles;import org.apache.hadoop.mapreduce.security.TokenCache;import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;import org.apache.hadoop.mapreduce.split.JobSplitWriter;import org.apache.hadoop.net.NetUtils;import org.apache.hadoop.security.AccessControlException;import org.apache.hadoop.security.Credentials;import org.apache.hadoop.security.SecurityUtil;import org.apache.hadoop.security.UserGroupInformation;import org.apache.hadoop.security.authorize.AccessControlList;import org.apache.hadoop.security.token.SecretManager.InvalidToken;import org.apache.hadoop.security.token.Token;import org.apache.hadoop.security.token.TokenRenewer;import org.apache.hadoop.util.ReflectionUtils;import org.apache.hadoop.util.StringUtils;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;import org.codehaus.jackson.JsonParseException;import org.codehaus.jackson.map.JsonMappingException;import org.codehaus.jackson.map.ObjectMapper;/** * <code>JobClient</code> is the primary interface for the user-job to interact * with the {@link JobTracker}. * jobclient 是用户job 与jobtracker交互的主要接口。 * 它提供提交作业的功能 并且跟踪job的进度 接受组件任务的报告和log 获得mapreduce集群的状态信息等 * <code>JobClient</code> provides facilities to submit jobs, track their * progress, access component-tasks' reports/logs, get the Map-Reduce cluster * status information etc. * * <p>The job submission process involves: * <ol> * <li> * Checking the input and output specifications of the job. * 检查指定输入输出文件的有效性 * </li> * <li> * Computing the {@link InputSplit}s for the job. * 计算job的分割文件 * </li> * <li> * Setup the requisite accounting information for the {@link DistributedCache} * of the job, if necessary. * 设置job的需要信息 * </li> * <li> * Copying the job's jar and configuration to the map-reduce system directory * on the distributed file-system. * copy job的jar包和配置文件到mapreduce系统目录 * </li> * <li> * Submitting the job to the <code>JobTracker</code> and optionally monitoring * it's status. * 提交作业到jobtracker 并且选择性的监控状态 * </li> * </ol></p> * * Normally the user creates the application, describes various facets of the * job via {@link JobConf} and then uses the <code>JobClient</code> to submit * the job and monitor its progress. * * <p>Here is an example on how to use <code>JobClient</code>:</p> * <p><blockquote><pre> * // Create a new JobConf * JobConf job = new JobConf(new Configuration(), MyJob.class); * * // Specify various job-specific parameters * job.setJobName("myjob"); * * job.setInputPath(new Path("in")); * job.setOutputPath(new Path("out")); * * job.setMapperClass(MyJob.MyMapper.class); * job.setReducerClass(MyJob.MyReducer.class); * * // Submit the job, then poll for progress until the job is complete * JobClient.runJob(job); * </pre></blockquote></p> * * <h4 id="JobControl">Job Control</h4> * * <p>At times clients would chain map-reduce jobs to accomplish complex tasks * which cannot be done via a single map-reduce job. This is fairly easy since * the output of the job, typically, goes to distributed file-system and that * can be used as the input for the next job.</p> * chainmap的说明 * <p>However, this also means that the onus on ensuring jobs are complete * (success/failure) lies squarely on the clients. In such situations the * various job-control options are: * 然而,这也意味着,确保工作的责任完成(成功/失败),完全在于对客户端 * <ol> * <li> * {@link #runJob(JobConf)} : submits the job and returns only after * the job has completed. * 提交作业 并且只有job完成时候才返回 * </li> * <li> * {@link #submitJob(JobConf)} : only submits the job, then poll the * returned handle to the {@link RunningJob} to query status and make * scheduling decisions. * 仅仅提交作业,然后 查询返回的 runningjob的句柄 查出它的状态和做出调度决定 * </li> * <li> * {@link JobConf#setJobEndNotificationURI(String)} : setup a notification * on job-completion, thus avoiding polling. * 设置job完成时候 不通知 这样就避免了查询 * </li> * </ol></p> * * @see JobConf * @see ClusterStatus * @see Tool * @see DistributedCache */public class JobClient extends Configured implements MRConstants, Tool { private static final Log LOG = LogFactory.getLog(JobClient.class); public static enum TaskStatusFilter { NONE, KILLED, FAILED, SUCCEEDED, ALL } private TaskStatusFilter taskOutputFilter = TaskStatusFilter.FAILED; private static final long MAX_JOBPROFILE_AGE = 1000 * 2; static{ Configuration.addDefaultResource("mapred-default.xml"); Configuration.addDefaultResource("mapred-site.xml"); } /** * A NetworkedJob is an implementation of RunningJob. It holds * a JobProfile object to provide some info, and interacts with the * remote service to provide certain functionality. */ static class NetworkedJob implements RunningJob { private JobSubmissionProtocol jobSubmitClient; JobProfile profile; JobStatus status; long statustime; /** * We store a JobProfile and a timestamp for when we last * acquired the job profile. If the job is null, then we cannot * perform any of the tasks, so we throw an exception. * The job might be null if the JobTracker has completely forgotten * about the job. (eg, 24 hours after the job completes.) */ public NetworkedJob(JobStatus job, JobProfile prof, JobSubmissionProtocol jobSubmitClient) throws IOException { this.status = job; this.profile = prof; this.jobSubmitClient = jobSubmitClient; if(this.status == null) { throw new IOException("The Job status cannot be null"); } if(this.profile == null) { throw new IOException("The Job profile cannot be null"); } if(this.jobSubmitClient == null) { throw new IOException("The Job Submission Protocol cannot be null"); } this.statustime = System.currentTimeMillis(); } /** * Some methods rely on having a recent job profile object. Refresh * it, if necessary */ synchronized void ensureFreshStatus() throws IOException { if (System.currentTimeMillis() - statustime > MAX_JOBPROFILE_AGE) { updateStatus(); } } /** Some methods need to update status immediately. So, refresh * immediately * @throws IOException */ synchronized void updateStatus() throws IOException { this.status = jobSubmitClient.getJobStatus(profile.getJobID()); if(this.status == null) { throw new IOException("The job appears to have been removed."); } this.statustime = System.currentTimeMillis(); } /** * An identifier for the job */ public JobID getID() { return profile.getJobID(); } /** @deprecated This method is deprecated and will be removed. Applications should * rather use {@link #getID()}.*/ @Deprecated public String getJobID() { return profile.getJobID().toString(); } /** * The user-specified job name */ public String getJobName() { return profile.getJobName(); } /** * The name of the job file */ public String getJobFile() { return profile.getJobFile(); } /** * A URL where the job's status can be seen */ public String getTrackingURL() { return profile.getURL().toString(); } /** * A float between 0.0 and 1.0, indicating the % of map work * completed. */ public float mapProgress() throws IOException { ensureFreshStatus(); return status.mapProgress(); }