MR2 client 代码分析
来源:互联网 发布:nba数据分析 编辑:程序博客网 时间:2024/04/29 19:52
- waitCompletion
我们先看wordcount ma
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1);//提交mapreduce 计算任务 }
waitForCompletion负责提交mapreduce任务,
/** * Submit the job to the cluster and wait for it to finish. * @param verbose print the progress to the user * @return true if the job succeeded * @throws IOException thrown if the communication with the * <code>JobTracker</code> is lost */ public boolean waitForCompletion(boolean verbose ) throws IOException, InterruptedException, ClassNotFoundException { if (state == JobState.DEFINE) { submit();//提交 } if (verbose) { monitorAndPrintJob(); } else { // get the completion poll interval from the client. int completionPollIntervalMillis = Job.getCompletionPollInterval(cluster.getConf()); while (!isComplete()) { try { Thread.sleep(completionPollIntervalMillis); } catch (InterruptedException ie) { } } } return isSuccessful();
/** * Internal method for submitting jobs to the system. * * <p>The job submission process involves: * <ol> * <li> * Checking the input and output specifications of the job. * </li> * <li> * Computing the {@link InputSplit}s for the job. * </li> * <li> * Setup the requisite accounting information for the * {@link DistributedCache} of the job, if necessary. * </li> * <li> * Copying the job's jar and configuration to the map-reduce system * directory on the distributed file-system. * </li> * <li> * Submitting the job to the <code>JobTracker</code> and optionally * monitoring it's status. * </li> * </ol></p> * @param job the configuration to submit * @param cluster the handle to the Cluster * @throws ClassNotFoundException * @throws InterruptedException * @throws IOException */ JobStatus submitJobInternal(Job job, Cluster cluster) throws ClassNotFoundException, InterruptedException, IOException { //validate the jobs output specs checkSpecs(job); Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, job.getConfiguration()); //configure the command line options correctly on the submitting dfs Configuration conf = job.getConfiguration(); InetAddress ip = InetAddress.getLocalHost(); if (ip != null) { submitHostAddress = ip.getHostAddress(); submitHostName = ip.getHostName(); conf.set(MRJobConfig.JOB_SUBMITHOST,submitHostName); conf.set(MRJobConfig.JOB_SUBMITHOSTADDR,submitHostAddress); } JobID jobId = submitClient.getNewJobID(); job.setJobID(jobId); Path submitJobDir = new Path(jobStagingArea, jobId.toString()); JobStatus status = null; try { conf.set(MRJobConfig.USER_NAME, UserGroupInformation.getCurrentUser().getShortUserName()); conf.set("hadoop.http.filter.initializers", "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer"); conf.set(MRJobConfig.MAPREDUCE_JOB_DIR, submitJobDir.toString()); LOG.debug("Configuring job " + jobId + " with " + submitJobDir + " as the submit dir"); // get delegation token for the dir TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { submitJobDir }, conf); populateTokenCache(conf, job.getCredentials()); // generate a secret to authenticate shuffle transfers if (TokenCache.getShuffleSecretKey(job.getCredentials()) == null) { KeyGenerator keyGen; try { keyGen = KeyGenerator.getInstance(SHUFFLE_KEYGEN_ALGORITHM); keyGen.init(SHUFFLE_KEY_LENGTH); } catch (NoSuchAlgorithmException e) { throw new IOException("Error generating shuffle secret key", e); } SecretKey shuffleKey = keyGen.generateKey(); TokenCache.setShuffleSecretKey(shuffleKey.getEncoded(), job.getCredentials()); } copyAndConfigureFiles(job, submitJobDir); Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir); // Create the splits for the job LOG.debug("Creating splits at " + jtFs.makeQualified(submitJobDir)); int maps = writeSplits(job, submitJobDir); conf.setInt(MRJobConfig.NUM_MAPS, maps); LOG.info("number of splits:" + maps); // write "queue admins of the queue to which job is being submitted" // to job file. String queue = conf.get(MRJobConfig.QUEUE_NAME, JobConf.DEFAULT_QUEUE_NAME); AccessControlList acl = submitClient.getQueueAdmins(queue); conf.set(toFullPropertyName(queue, QueueACL.ADMINISTER_JOBS.getAclName()), acl.getAclString()); // removing jobtoken referrals before copying the jobconf to HDFS // as the tasks don't need this setting, actually they may break // because of it if present as the referral will point to a // different job. TokenCache.cleanUpTokenReferral(conf); if (conf.getBoolean( MRJobConfig.JOB_TOKEN_TRACKING_IDS_ENABLED, MRJobConfig.DEFAULT_JOB_TOKEN_TRACKING_IDS_ENABLED)) { // Add HDFS tracking ids ArrayList<String> trackingIds = new ArrayList<String>(); for (Token<? extends TokenIdentifier> t : job.getCredentials().getAllTokens()) { trackingIds.add(t.decodeIdentifier().getTrackingId()); } conf.setStrings(MRJobConfig.JOB_TOKEN_TRACKING_IDS, trackingIds.toArray(new String[trackingIds.size()])); } // Write job file to submit dir writeConf(conf, submitJobFile); // // Now, actually submit the job (using the submit name) // printTokens(jobId, job.getCredentials()); status = submitClient.submitJob( jobId, submitJobDir.toString(), job.getCredentials()); if (status != null) { return status; } else { throw new IOException("Could not launch job"); } } finally { if (status == null) { LOG.info("Cleaning up the staging area " + submitJobDir); if (jtFs != null && submitJobDir != null) jtFs.delete(submitJobDir, true); } }
- YARNRunner.java
ClientPotocol的YARNRunner类接管submitJob工作,检查运行条件并初始化运行MRAppMaster的相关信息,向ResourceManager申请Container运行MRAppMaster
public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts) throws IOException, InterruptedException { addHistoryToken(ts); /* Construct necessary information to start the MR AM ApplicationId ApplicationName Queue:Application将被提交到的队列 Priority:Application的优先级 User:运行MRAppMaster的用户 AMContainerSpec:运行ApplicationMaster的Container的信息 ContainerId User:运行MRAppMaster的用户 Resource:ResourceManager分配给该MRAppMaster的资源 ContainerToken:Security模式下的SecurityTokens LocalResources:MRAppMaster所在的jar包、Job的配置文件、Job程序所在的jar包、每个Split的相关信息等 ServiceData: Environment:运行MRAppMaster的ClassPath以及其他的环境便令 Commands:运行MRAppMaster的Command,如:$JAVA_HOME/bin/java MRAppMaster.class.getName() ... ApplicationACLs:MRAppMaster的访问控制列表 */ ApplicationSubmissionContext appContext = createApplicationSubmissionContext(conf, jobSubmitDir, ts); // Submit to ResourceManager try { ApplicationId applicationId = resMgrDelegate.submitApplication(appContext); // Submit to ResourceManager ApplicationReport appMaster = resMgrDelegate .getApplicationReport(applicationId); String diagnostics = (appMaster == null ? "application report is null" : appMaster.getDiagnostics()); if (appMaster == null || appMaster.getYarnApplicationState() == YarnApplicationState.FAILED || appMaster.getYarnApplicationState() == YarnApplicationState.KILLED) { throw new IOException("Failed to run job : " + diagnostics); } return clientCache.getClient(jobId).getJobStatus(jobId); } catch (YarnException e) { throw new IOException(e); } }
ResourceMgrDelegate.java
ResourceMgrDelegate负责和ResourceManager的通信,并向ResourceManager提交启动ApplicationMaster(MRAppMaster)。
@Override public ApplicationId submitApplication(ApplicationSubmissionContext appContext) throws YarnException, IOException { return client.submitApplication(appContext);//这里提交的任务 }
/** * Delegate responsible for communicating with the Resource Manager's * {@link ApplicationClientProtocol}. * @param conf the configuration object. */ public ResourceMgrDelegate(YarnConfiguration conf) { super(ResourceMgrDelegate.class.getName()); this.conf = conf; this.client = YarnClient.createYarnClient();//定义了client的实现方法 init(conf); start();
YarnClientImpl.java
使用了如下方法:
public ApplicationId submitApplication(ApplicationSubmissionContext appContext) throws YarnException, IOException { ApplicationId applicationId = appContext.getApplicationId(); appContext.setApplicationId(applicationId); SubmitApplicationRequest request = Records.newRecord(SubmitApplicationRequest.class); request.setApplicationSubmissionContext(appContext); rmClient.submitApplication(request);//这里面提交任务 int pollCount = 0; while (true) { YarnApplicationState state = getApplicationReport(applicationId).getYarnApplicationState(); if (!state.equals(YarnApplicationState.NEW) && !state.equals(YarnApplicationState.NEW_SAVING)) { break; } // Notify the client through the log every 10 poll, in case the client // is blocked here too long. if (++pollCount % 10 == 0) { LOG.info("Application submission is not finished, " + "submitted application " + applicationId + " is still in " + state); } try { Thread.sleep(statePollIntervalMillis); } catch (InterruptedException ie) { } }
最后调用了
ApplicationClientProtocolPBClientImpl.java
@Override public SubmitApplicationResponse submitApplication( SubmitApplicationRequest request) throws YarnException, IOException { SubmitApplicationRequestProto requestProto = ((SubmitApplicationRequestPBImpl) request).getProto(); try { return new SubmitApplicationResponsePBImpl(proxy.submitApplication(null, requestProto)); } catch (ServiceException e) { RPCUtil.unwrapAndThrowException(e); return null; }
这里面的proxy.submitApplication(null,requestProto));为通过RPC向RM提交请求
client的处理过程基本分析完成。由于能力有限,欢迎批评指正。
public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts) throws IOException, InterruptedException { addHistoryToken(ts); /* Construct necessary information to start the MR AM ApplicationId ApplicationName Queue:Application将被提交到的队列 Priority:Application的优先级 User:运行MRAppMaster的用户 AMContainerSpec:运行ApplicationMaster的Container的信息 ContainerId User:运行MRAppMaster的用户 Resource:ResourceManager分配给该MRAppMaster的资源 ContainerToken:Security模式下的SecurityTokens LocalResources:MRAppMaster所在的jar包、Job的配置文件、Job程序所在的jar包、每个Split的相关信息等 ServiceData: Environment:运行MRAppMaster的ClassPath以及其他的环境便令 Commands:运行MRAppMaster的Command,如:$JAVA_HOME/bin/java MRAppMaster.class.getName() ... ApplicationACLs:MRAppMaster的访问控制列表 */ ApplicationSubmissionContext appContext = createApplicationSubmissionContext(conf, jobSubmitDir, ts); // Submit to ResourceManager try { ApplicationId applicationId = resMgrDelegate.submitApplication(appContext); // Submit to ResourceManager ApplicationReport appMaster = resMgrDelegate .getApplicationReport(applicationId); String diagnostics = (appMaster == null ? "application report is null" : appMaster.getDiagnostics()); if (appMaster == null || appMaster.getYarnApplicationState() == YarnApplicationState.FAILED || appMaster.getYarnApplicationState() == YarnApplicationState.KILLED) { throw new IOException("Failed to run job : " + diagnostics); } return clientCache.getClient(jobId).getJobStatus(jobId); } catch (YarnException e) { throw new IOException(e); } }
ResourceMgrDelegate.java
ResourceMgrDelegate负责和ResourceManager的通信,并向ResourceManager提交启动ApplicationMaster(MRAppMaster)。
@Override public ApplicationId submitApplication(ApplicationSubmissionContext appContext) throws YarnException, IOException { return client.submitApplication(appContext);//这里提交的任务 }
/** * Delegate responsible for communicating with the Resource Manager's * {@link ApplicationClientProtocol}. * @param conf the configuration object. */ public ResourceMgrDelegate(YarnConfiguration conf) { super(ResourceMgrDelegate.class.getName()); this.conf = conf; this.client = YarnClient.createYarnClient();//定义了client的实现方法 init(conf); start();
YarnClientImpl.java
使用了如下方法:
public ApplicationId submitApplication(ApplicationSubmissionContext appContext) throws YarnException, IOException { ApplicationId applicationId = appContext.getApplicationId(); appContext.setApplicationId(applicationId); SubmitApplicationRequest request = Records.newRecord(SubmitApplicationRequest.class); request.setApplicationSubmissionContext(appContext); rmClient.submitApplication(request);//这里面提交任务 int pollCount = 0; while (true) { YarnApplicationState state = getApplicationReport(applicationId).getYarnApplicationState(); if (!state.equals(YarnApplicationState.NEW) && !state.equals(YarnApplicationState.NEW_SAVING)) { break; } // Notify the client through the log every 10 poll, in case the client // is blocked here too long. if (++pollCount % 10 == 0) { LOG.info("Application submission is not finished, " + "submitted application " + applicationId + " is still in " + state); } try { Thread.sleep(statePollIntervalMillis); } catch (InterruptedException ie) { } }
最后调用了ApplicationClientProtocolPBClientImpl.java
@Override public SubmitApplicationResponse submitApplication( SubmitApplicationRequest request) throws YarnException, IOException { SubmitApplicationRequestProto requestProto = ((SubmitApplicationRequestPBImpl) request).getProto(); try { return new SubmitApplicationResponsePBImpl(proxy.submitApplication(null, requestProto)); } catch (ServiceException e) { RPCUtil.unwrapAndThrowException(e); return null; }
这里面的proxy.submitApplication(null,requestProto));为通过RPC向RM提交请求
0 0
- MR2 client 代码分析
- openstack client代码分析
- SCIM Bridge Client端代码分析
- appium python-client代码分析(一)
- appium python-client代码分析(二)
- appium python-client代码分析(三)
- Client代码
- UChome 代码分析讲解:uc_client模块的client.php文件
- ZooKeeper Server/Client Session 设计及代码分析
- contiki学习笔记-UDP-Client原厂代码分析
- UDP server,client 代码
- java代码client
- openvpn client 代码执行
- 【代码积累】NIO client
- 【代码积累】TCP client
- 【代码积累】UDP client
- zookeeper client分析
- Zookeeper C Client分析
- datagridview中显示行号
- SNS 背后的技术: 消息流的推拉模式选择
- AT&T汇编指令介绍
- redis 代码--hash/迭代
- 2014年4月操作系统份额:Windows XP仍然在26%以上,Linux微涨
- MR2 client 代码分析
- android 回退事件
- IOS学习笔记64--关于linker command failed with exit code错误
- JS 如何实现模板引擎
- 谈谈Memcached与Redis
- OGG-01008 Extract displays Discarding bad record (discard recs=1) when using filter or where clause
- iTues Connect
- 幸福的小情调
- Hadoop配置之中JAVA_HOME设置问题