由TaskTracker启动到Task执行
来源:互联网 发布:淘宝手办店推荐 编辑:程序博客网 时间:2024/06/01 20:54
1 TaskTracker main函数
public static void main(String argv[]) throws Exception { StringUtils.startupShutdownMessage(TaskTracker.class, argv, LOG); if (argv.length != 0) { System.out.println("usage: TaskTracker"); System.exit(-1); } try { JobConf conf=new JobConf(); // enable the server to track time spent waiting on locks ReflectionUtils.setContentionTracing (conf.getBoolean("tasktracker.contention.tracking", false)); new TaskTracker(conf).run(); } catch (Throwable e) { LOG.error("Can not start task tracker because "+ StringUtils.stringifyException(e)); System.exit(-1); } }
可知主要是new TaskTracker(conf).run();这句代码,创建一个新的TaskTracker并执行该线程。
2 TaskTracker的构造函数
public TaskTracker(JobConf conf) throws IOException { originalConf = conf; maxCurrentMapTasks = conf.getInt( "mapred.tasktracker.map.tasks.maximum", 2); maxCurrentReduceTasks = conf.getInt( "mapred.tasktracker.reduce.tasks.maximum", 2); this.jobTrackAddr = JobTracker.getAddress(conf); String infoAddr = NetUtils.getServerAddress(conf, "tasktracker.http.bindAddress", "tasktracker.http.port", "mapred.task.tracker.http.address"); InetSocketAddress infoSocAddr = NetUtils.createSocketAddr(infoAddr); String httpBindAddress = infoSocAddr.getHostName(); int httpPort = infoSocAddr.getPort(); this.server = new HttpServer("task", httpBindAddress, httpPort, httpPort == 0, conf); workerThreads = conf.getInt("tasktracker.http.threads", 40); this.shuffleServerMetrics = new ShuffleServerMetrics(conf); server.setThreads(1, workerThreads); // let the jsp pages get to the task tracker, config, and other relevant // objects FileSystem local = FileSystem.getLocal(conf); this.localDirAllocator = new LocalDirAllocator("mapred.local.dir"); server.setAttribute("task.tracker", this); server.setAttribute("local.file.system", local); server.setAttribute("conf", conf); server.setAttribute("log", LOG); server.setAttribute("localDirAllocator", localDirAllocator); server.setAttribute("shuffleServerMetrics", shuffleServerMetrics); server.addInternalServlet("mapOutput", "/mapOutput", MapOutputServlet.class); server.addInternalServlet("taskLog", "/tasklog", TaskLogServlet.class); server.start(); this.httpPort = server.getPort(); checkJettyPort(httpPort); initialize(); }
该函数的主要功能有两个,一个是创建HttpServer,另外一个是initialize,initialize其实才是完成TaskTracker初始化的主要工作
3 initialize函数
synchronized void initialize() throws IOException { // use configured nameserver & interface to get local hostname this.fConf = new JobConf(originalConf); if (fConf.get("slave.host.name") != null) { this.localHostname = fConf.get("slave.host.name"); } if (localHostname == null) { this.localHostname = DNS.getDefaultHost (fConf.get("mapred.tasktracker.dns.interface","default"), fConf.get("mapred.tasktracker.dns.nameserver","default")); } //check local disk checkLocalDirs(this.fConf.getLocalDirs()); fConf.deleteLocalFiles(SUBDIR); // Clear out state tables this.tasks.clear(); this.runningTasks = new LinkedHashMap<TaskAttemptID, TaskInProgress>(); this.runningJobs = new TreeMap<JobID, RunningJob>(); this.mapTotal = 0; this.reduceTotal = 0; this.acceptNewTasks = true; this.status = null; this.minSpaceStart = this.fConf.getLong("mapred.local.dir.minspacestart", 0L); this.minSpaceKill = this.fConf.getLong("mapred.local.dir.minspacekill", 0L); //tweak the probe sample size (make it a function of numCopiers) probe_sample_size = this.fConf.getInt("mapred.tasktracker.events.batchsize", 500); Class<? extends TaskTrackerInstrumentation> metricsInst = getInstrumentationClass(fConf); try { java.lang.reflect.Constructor<? extends TaskTrackerInstrumentation> c = metricsInst.getConstructor(new Class[] {TaskTracker.class} ); this.myInstrumentation = c.newInstance(this); } catch(Exception e) { //Reflection can throw lots of exceptions -- handle them all by //falling back on the default. LOG.error("failed to initialize taskTracker metrics", e); this.myInstrumentation = new TaskTrackerMetricsInst(this); } // bind address String address = NetUtils.getServerAddress(fConf, "mapred.task.tracker.report.bindAddress", "mapred.task.tracker.report.port", "mapred.task.tracker.report.address"); InetSocketAddress socAddr = NetUtils.createSocketAddr(address); String bindAddress = socAddr.getHostName(); int tmpPort = socAddr.getPort(); this.jvmManager = new JvmManager(this); // Set service-level authorization security policy if (this.fConf.getBoolean( ServiceAuthorizationManager.SERVICE_AUTHORIZATION_CONFIG, false)) { PolicyProvider policyProvider = (PolicyProvider)(ReflectionUtils.newInstance( this.fConf.getClass(PolicyProvider.POLICY_PROVIDER_CONFIG, MapReducePolicyProvider.class, PolicyProvider.class), this.fConf)); SecurityUtil.setPolicy(new ConfiguredPolicy(this.fConf, policyProvider)); } // RPC initialization int max = maxCurrentMapTasks > maxCurrentReduceTasks ? maxCurrentMapTasks : maxCurrentReduceTasks; //set the num handlers to max*2 since canCommit may wait for the duration //of a heartbeat RPC this.taskReportServer = RPC.getServer(this, bindAddress, tmpPort, 2 * max, false, this.fConf); this.taskReportServer.start(); // get the assigned address this.taskReportAddress = taskReportServer.getListenerAddress(); this.fConf.set("mapred.task.tracker.report.address", taskReportAddress.getHostName() + ":" + taskReportAddress.getPort()); LOG.info("TaskTracker up at: " + this.taskReportAddress); this.taskTrackerName = "tracker_" + localHostname + ":" + taskReportAddress; LOG.info("Starting tracker " + taskTrackerName); // Clear out temporary files that might be lying around DistributedCache.purgeCache(this.fConf); cleanupStorage(); this.jobClient = (InterTrackerProtocol) RPC.waitForProxy(InterTrackerProtocol.class, InterTrackerProtocol.versionID, jobTrackAddr, this.fConf); this.justInited = true; this.running = true; // start the thread that will fetch map task completion events this.mapEventsFetcher = new MapEventsFetcherThread(); mapEventsFetcher.setDaemon(true); mapEventsFetcher.setName( "Map-events fetcher for all reduce tasks " + "on " + taskTrackerName); mapEventsFetcher.start(); initializeMemoryManagement(); this.indexCache = new IndexCache(this.fConf); mapLauncher = new TaskLauncher(maxCurrentMapTasks); reduceLauncher = new TaskLauncher(maxCurrentReduceTasks); mapLauncher.start(); reduceLauncher.start(); }
其主要功能是初始化一般属性,创建一个taskReportServer并通过rpc获得一个jobClient引用,创建jobClient用到的地址在构造函数中已经得到初始化 this.jobTrackAddr = JobTracker.getAddress(conf);。还有创建一个mapLauncher和reduceLauncher并启动。
4 mapLauncher.run()
public void run() { while (!Thread.interrupted()) { try { TaskInProgress tip; synchronized (tasksToLaunch) { while (tasksToLaunch.isEmpty()) { tasksToLaunch.wait(); } //get the TIP tip = tasksToLaunch.remove(0); LOG.info("Trying to launch : " + tip.getTask().getTaskID()); } //wait for a slot to run synchronized (numFreeSlots) { while (numFreeSlots.get() == 0) { numFreeSlots.wait(); } LOG.info("In TaskLauncher, current free slots : " + numFreeSlots.get()+ " and trying to launch "+tip.getTask().getTaskID()); numFreeSlots.set(numFreeSlots.get() - 1); assert (numFreeSlots.get() >= 0); } synchronized (tip) { //to make sure that there is no kill task action for this if (tip.getRunState() != TaskStatus.State.UNASSIGNED && tip.getRunState() != TaskStatus.State.FAILED_UNCLEAN && tip.getRunState() != TaskStatus.State.KILLED_UNCLEAN) { //got killed externally while still in the launcher queue addFreeSlot(); continue; } tip.slotTaken = true; } //got a free slot. launch the task startNewTask(tip); } catch (InterruptedException e) { return; // ALL DONE } catch (Throwable th) { LOG.error("TaskLauncher error " + StringUtils.stringifyException(th)); } } }
它先等待任务的到来,然后等待空闲的slot,等两者兼备时选择合适状态的任务执行startNewTask(tip);
5 startNewTask
private void startNewTask(TaskInProgress tip) { try { localizeJob(tip); } catch (Throwable e) { String msg = ("Error initializing " + tip.getTask().getTaskID() + ":\n" + StringUtils.stringifyException(e)); LOG.warn(msg); tip.reportDiagnosticInfo(msg); try { tip.kill(true); tip.cleanup(true); } catch (IOException ie2) { LOG.info("Error cleaning up " + tip.getTask().getTaskID() + ":\n" + StringUtils.stringifyException(ie2)); } // Careful! // This might not be an 'Exception' - don't handle 'Error' here! if (e instanceof Error) { throw ((Error) e); } } }
直接进入localizeJob
6 localizeJob
private void localizeJob(TaskInProgress tip) throws IOException { Path localJarFile = null; Task t = tip.getTask(); JobID jobId = t.getJobID(); Path jobFile = new Path(t.getJobFile()); // Get sizes of JobFile and JarFile // sizes are -1 if they are not present. FileStatus status = null; long jobFileSize = -1; try { status = systemFS.getFileStatus(jobFile); jobFileSize = status.getLen(); } catch(FileNotFoundException fe) { jobFileSize = -1; } Path localJobFile = lDirAlloc.getLocalPathForWrite( getLocalJobDir(jobId.toString()) + Path.SEPARATOR + "job.xml", jobFileSize, fConf); RunningJob rjob = addTaskToJob(jobId, tip); synchronized (rjob) { if (!rjob.localized) { FileSystem localFs = FileSystem.getLocal(fConf); // this will happen on a partial execution of localizeJob. // Sometimes the job.xml gets copied but copying job.jar // might throw out an exception // we should clean up and then try again Path jobDir = localJobFile.getParent(); if (localFs.exists(jobDir)){ localFs.delete(jobDir, true); boolean b = localFs.mkdirs(jobDir); if (!b) throw new IOException("Not able to create job directory " + jobDir.toString()); } systemFS.copyToLocalFile(jobFile, localJobFile); JobConf localJobConf = new JobConf(localJobFile); // create the 'work' directory // job-specific shared directory for use as scratch space Path workDir = lDirAlloc.getLocalPathForWrite( (getLocalJobDir(jobId.toString()) + Path.SEPARATOR + "work"), fConf); if (!localFs.mkdirs(workDir)) { throw new IOException("Mkdirs failed to create " + workDir.toString()); } System.setProperty("job.local.dir", workDir.toString()); localJobConf.set("job.local.dir", workDir.toString()); // copy Jar file to the local FS and unjar it. String jarFile = localJobConf.getJar(); long jarFileSize = -1; if (jarFile != null) { Path jarFilePath = new Path(jarFile); try { status = systemFS.getFileStatus(jarFilePath); jarFileSize = status.getLen(); } catch(FileNotFoundException fe) { jarFileSize = -1; } // Here we check for and we check five times the size of jarFileSize // to accommodate for unjarring the jar file in work directory localJarFile = new Path(lDirAlloc.getLocalPathForWrite( getLocalJobDir(jobId.toString()) + Path.SEPARATOR + "jars", 5 * jarFileSize, fConf), "job.jar"); if (!localFs.mkdirs(localJarFile.getParent())) { throw new IOException("Mkdirs failed to create jars directory "); } systemFS.copyToLocalFile(jarFilePath, localJarFile); localJobConf.setJar(localJarFile.toString()); OutputStream out = localFs.create(localJobFile); try { localJobConf.writeXml(out); } finally { out.close(); } // also unjar the job.jar files RunJar.unJar(new File(localJarFile.toString()), new File(localJarFile.getParent().toString())); } rjob.keepJobFiles = ((localJobConf.getKeepTaskFilesPattern() != null) || localJobConf.getKeepFailedTaskFiles()); rjob.localized = true; rjob.jobConf = localJobConf; } } launchTaskForJob(tip, new JobConf(rjob.jobConf)); }
前面的主要功能是将作业本地化, systemFS.copyToLocalFile(jobFile, localJobFile);systemFS.copyToLocalFile(jarFilePath, localJarFile);这两句完成程序和数据的拷贝。
最后launchTaskForJob(tip, new JobConf(rjob.jobConf));
7 launchTaskForJob
private void launchTaskForJob(TaskInProgress tip, JobConf jobConf) throws IOException{ synchronized (tip) { tip.setJobConf(jobConf); tip.launchTask(); } }
8 tip.launchTask
public synchronized void launchTask() throws IOException { if (this.taskStatus.getRunState() == TaskStatus.State.UNASSIGNED || this.taskStatus.getRunState() == TaskStatus.State.FAILED_UNCLEAN || this.taskStatus.getRunState() == TaskStatus.State.KILLED_UNCLEAN) { localizeTask(task); if (this.taskStatus.getRunState() == TaskStatus.State.UNASSIGNED) { this.taskStatus.setRunState(TaskStatus.State.RUNNING); } this.runner = task.createRunner(TaskTracker.this, this); this.runner.start(); this.taskStatus.setStartTime(System.currentTimeMillis()); } else { LOG.info("Not launching task: " + task.getTaskID() + " since it's state is " + this.taskStatus.getRunState()); } }
9 TaskRunner.run()
public final void run() { try { //before preparing the job localize //all the archives TaskAttemptID taskid = t.getTaskID(); LocalDirAllocator lDirAlloc = new LocalDirAllocator("mapred.local.dir"); File jobCacheDir = null; if (conf.getJar() != null) { jobCacheDir = new File( new Path(conf.getJar()).getParent().toString()); } File workDir = new File(lDirAlloc.getLocalPathToRead( TaskTracker.getLocalTaskDir( t.getJobID().toString(), t.getTaskID().toString(), t.isTaskCleanupTask()) + Path.SEPARATOR + MRConstants.WORKDIR, conf). toString()); URI[] archives = DistributedCache.getCacheArchives(conf); URI[] files = DistributedCache.getCacheFiles(conf); FileStatus fileStatus; FileSystem fileSystem; Path localPath; String baseDir; if ((archives != null) || (files != null)) { if (archives != null) { String[] archivesTimestamps = DistributedCache.getArchiveTimestamps(conf); Path[] p = new Path[archives.length]; for (int i = 0; i < archives.length;i++){ fileSystem = FileSystem.get(archives[i], conf); fileStatus = fileSystem.getFileStatus( new Path(archives[i].getPath())); String cacheId = DistributedCache.makeRelative(archives[i],conf); String cachePath = TaskTracker.getCacheSubdir() + Path.SEPARATOR + cacheId; localPath = lDirAlloc.getLocalPathForWrite(cachePath, fileStatus.getLen(), conf); baseDir = localPath.toString().replace(cacheId, ""); p[i] = DistributedCache.getLocalCache(archives[i], conf, new Path(baseDir), fileStatus, true, Long.parseLong( archivesTimestamps[i]), new Path(workDir. getAbsolutePath()), false); } DistributedCache.setLocalArchives(conf, stringifyPathArray(p)); } if ((files != null)) { String[] fileTimestamps = DistributedCache.getFileTimestamps(conf); Path[] p = new Path[files.length]; for (int i = 0; i < files.length;i++){ fileSystem = FileSystem.get(files[i], conf); fileStatus = fileSystem.getFileStatus( new Path(files[i].getPath())); String cacheId = DistributedCache.makeRelative(files[i], conf); String cachePath = TaskTracker.getCacheSubdir() + Path.SEPARATOR + cacheId; localPath = lDirAlloc.getLocalPathForWrite(cachePath, fileStatus.getLen(), conf); baseDir = localPath.toString().replace(cacheId, ""); p[i] = DistributedCache.getLocalCache(files[i], conf, new Path(baseDir), fileStatus, false, Long.parseLong( fileTimestamps[i]), new Path(workDir. getAbsolutePath()), false); } DistributedCache.setLocalFiles(conf, stringifyPathArray(p)); } Path localTaskFile = new Path(t.getJobFile()); FileSystem localFs = FileSystem.getLocal(conf); localFs.delete(localTaskFile, true); OutputStream out = localFs.create(localTaskFile); try { conf.writeXml(out); } finally { out.close(); } } if (!prepare()) { return; } String sep = System.getProperty("path.separator"); StringBuffer classPath = new StringBuffer(); // start with same classpath as parent process classPath.append(System.getProperty("java.class.path")); classPath.append(sep); if (!workDir.mkdirs()) { if (!workDir.isDirectory()) { LOG.fatal("Mkdirs failed to create " + workDir.toString()); } } String jar = conf.getJar(); if (jar != null) { // if jar exists, it into workDir File[] libs = new File(jobCacheDir, "lib").listFiles(); if (libs != null) { for (int i = 0; i < libs.length; i++) { classPath.append(sep); // add libs from jar to classpath classPath.append(libs[i]); } } classPath.append(sep); classPath.append(new File(jobCacheDir, "classes")); classPath.append(sep); classPath.append(jobCacheDir); } // include the user specified classpath //archive paths Path[] archiveClasspaths = DistributedCache.getArchiveClassPaths(conf); if (archiveClasspaths != null && archives != null) { Path[] localArchives = DistributedCache .getLocalCacheArchives(conf); if (localArchives != null){ for (int i=0;i<archives.length;i++){ for(int j=0;j<archiveClasspaths.length;j++){ if (archives[i].getPath().equals( archiveClasspaths[j].toString())){ classPath.append(sep); classPath.append(localArchives[i] .toString()); } } } } } //file paths Path[] fileClasspaths = DistributedCache.getFileClassPaths(conf); if (fileClasspaths!=null && files != null) { Path[] localFiles = DistributedCache .getLocalCacheFiles(conf); if (localFiles != null) { for (int i = 0; i < files.length; i++) { for (int j = 0; j < fileClasspaths.length; j++) { if (files[i].getPath().equals( fileClasspaths[j].toString())) { classPath.append(sep); classPath.append(localFiles[i].toString()); } } } } } classPath.append(sep); classPath.append(workDir); // Build exec child jmv args. Vector<String> vargs = new Vector<String>(8); File jvm = // use same jvm as parent new File(new File(System.getProperty("java.home"), "bin"), "java"); vargs.add(jvm.toString()); // Add child (task) java-vm options. // // The following symbols if present in mapred.child.java.opts value are // replaced: // + @taskid@ is interpolated with value of TaskID. // Other occurrences of @ will not be altered. // // Example with multiple arguments and substitutions, showing // jvm GC logging, and start of a passwordless JVM JMX agent so can // connect with jconsole and the likes to watch child memory, threads // and get thread dumps. // // <property> // <name>mapred.child.java.opts</name> // <value>-verbose:gc -Xloggc:/tmp/@taskid@.gc \ // -Dcom.sun.management.jmxremote.authenticate=false \ // -Dcom.sun.management.jmxremote.ssl=false \ // </value> // </property> // String javaOpts = conf.get("mapred.child.java.opts", "-Xmx200m"); javaOpts = javaOpts.replace("@taskid@", taskid.toString()); String [] javaOptsSplit = javaOpts.split(" "); // Add java.library.path; necessary for loading native libraries. // // 1. To support native-hadoop library i.e. libhadoop.so, we add the // parent processes' java.library.path to the child. // 2. We also add the 'cwd' of the task to it's java.library.path to help // users distribute native libraries via the DistributedCache. // 3. The user can also specify extra paths to be added to the // java.library.path via mapred.child.java.opts. // String libraryPath = System.getProperty("java.library.path"); if (libraryPath == null) { libraryPath = workDir.getAbsolutePath(); } else { libraryPath += sep + workDir; } boolean hasUserLDPath = false; for(int i=0; i<javaOptsSplit.length ;i++) { if(javaOptsSplit[i].startsWith("-Djava.library.path=")) { javaOptsSplit[i] += sep + libraryPath; hasUserLDPath = true; break; } } if(!hasUserLDPath) { vargs.add("-Djava.library.path=" + libraryPath); } for (int i = 0; i < javaOptsSplit.length; i++) { vargs.add(javaOptsSplit[i]); } // add java.io.tmpdir given by mapred.child.tmp String tmp = conf.get("mapred.child.tmp", "./tmp"); Path tmpDir = new Path(tmp); // if temp directory path is not absolute // prepend it with workDir. if (!tmpDir.isAbsolute()) { tmpDir = new Path(workDir.toString(), tmp); } FileSystem localFs = FileSystem.getLocal(conf); if (!localFs.mkdirs(tmpDir) && !localFs.getFileStatus(tmpDir).isDir()) { throw new IOException("Mkdirs failed to create " + tmpDir.toString()); } vargs.add("-Djava.io.tmpdir=" + tmpDir.toString()); // Add classpath. vargs.add("-classpath"); vargs.add(classPath.toString()); // Setup the log4j prop long logSize = TaskLog.getTaskLogLength(conf); vargs.add("-Dhadoop.log.dir=" + new File(System.getProperty("hadoop.log.dir") ).getAbsolutePath()); vargs.add("-Dhadoop.root.logger=INFO,TLA"); vargs.add("-Dhadoop.tasklog.taskid=" + taskid); vargs.add("-Dhadoop.tasklog.totalLogFileSize=" + logSize); if (conf.getProfileEnabled()) { if (conf.getProfileTaskRange(t.isMapTask() ).isIncluded(t.getPartition())) { File prof = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.PROFILE); vargs.add(String.format(conf.getProfileParams(), prof.toString())); } } // Add main class and its arguments vargs.add(Child.class.getName()); // main of Child // pass umbilical address InetSocketAddress address = tracker.getTaskTrackerReportAddress(); vargs.add(address.getAddress().getHostAddress()); vargs.add(Integer.toString(address.getPort())); vargs.add(taskid.toString()); // pass task identifier String pidFile = lDirAlloc.getLocalPathForWrite( (TaskTracker.getPidFile(t.getJobID().toString(), taskid.toString(), t.isTaskCleanupTask())), this.conf).toString(); t.setPidFile(pidFile); tracker.addToMemoryManager(t.getTaskID(), t.isMapTask(), conf, pidFile); // set memory limit using ulimit if feasible and necessary ... String[] ulimitCmd = Shell.getUlimitMemoryCommand(conf); List<String> setup = null; if (ulimitCmd != null) { setup = new ArrayList<String>(); for (String arg : ulimitCmd) { setup.add(arg); } } // Set up the redirection of the task's stdout and stderr streams File stdout = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDOUT); File stderr = TaskLog.getTaskLogFile(taskid, TaskLog.LogName.STDERR); stdout.getParentFile().mkdirs(); tracker.getTaskTrackerInstrumentation().reportTaskLaunch(taskid, stdout, stderr); Map<String, String> env = new HashMap<String, String>(); StringBuffer ldLibraryPath = new StringBuffer(); ldLibraryPath.append(workDir.toString()); String oldLdLibraryPath = null; oldLdLibraryPath = System.getenv("LD_LIBRARY_PATH"); if (oldLdLibraryPath != null) { ldLibraryPath.append(sep); ldLibraryPath.append(oldLdLibraryPath); } env.put("LD_LIBRARY_PATH", ldLibraryPath.toString()); jvmManager.launchJvm(this, jvmManager.constructJvmEnv(setup,vargs,stdout,stderr,logSize, workDir, env, pidFile, conf)); synchronized (lock) { while (!done) { lock.wait(); } } tracker.getTaskTrackerInstrumentation().reportTaskEnd(t.getTaskID()); if (exitCodeSet) { if (!killed && exitCode != 0) { if (exitCode == 65) { tracker.getTaskTrackerInstrumentation().taskFailedPing(t.getTaskID()); } throw new IOException("Task process exit with nonzero status of " + exitCode + "."); } } } catch (FSError e) { LOG.fatal("FSError", e); try { tracker.fsError(t.getTaskID(), e.getMessage()); } catch (IOException ie) { LOG.fatal(t.getTaskID()+" reporting FSError", ie); } } catch (Throwable throwable) { LOG.warn(t.getTaskID()+" Child Error", throwable); ByteArrayOutputStream baos = new ByteArrayOutputStream(); throwable.printStackTrace(new PrintStream(baos)); try { tracker.reportDiagnosticInfo(t.getTaskID(), baos.toString()); } catch (IOException e) { LOG.warn(t.getTaskID()+" Reporting Diagnostics", e); } } finally { try{ URI[] archives = DistributedCache.getCacheArchives(conf); URI[] files = DistributedCache.getCacheFiles(conf); if (archives != null){ for (int i = 0; i < archives.length; i++){ DistributedCache.releaseCache(archives[i], conf); } } if (files != null){ for(int i = 0; i < files.length; i++){ DistributedCache.releaseCache(files[i], conf); } } }catch(IOException ie){ LOG.warn("Error releasing caches : Cache files might not have been cleaned up"); } tip.reportTaskFinished(); } }
10 jvmManager.launchJvm
public void launchJvm(TaskRunner t, JvmEnv env) { if (t.getTask().isMapTask()) { mapJvmManager.reapJvm(t, env); } else { reduceJvmManager.reapJvm(t, env); } }
11 mapJvmManager.reapJvm()
private synchronized void reapJvm( TaskRunner t, JvmEnv env) { if (t.getTaskInProgress().wasKilled()) { //the task was killed in-flight //no need to do the rest of the operations return; } boolean spawnNewJvm = false; JobID jobId = t.getTask().getJobID(); //Check whether there is a free slot to start a new JVM. //,or, Kill a (idle) JVM and launch a new one //When this method is called, we *must* // (1) spawn a new JVM (if we are below the max) // (2) find an idle JVM (that belongs to the same job), or, // (3) kill an idle JVM (from a different job) // (the order of return is in the order above) int numJvmsSpawned = jvmIdToRunner.size(); JvmRunner runnerToKill = null; if (numJvmsSpawned >= maxJvms) { //go through the list of JVMs for all jobs. Iterator<Map.Entry<JVMId, JvmRunner>> jvmIter = jvmIdToRunner.entrySet().iterator(); while (jvmIter.hasNext()) { JvmRunner jvmRunner = jvmIter.next().getValue(); JobID jId = jvmRunner.jvmId.getJobId(); //look for a free JVM for this job; if one exists then just break if (jId.equals(jobId) && !jvmRunner.isBusy() && !jvmRunner.ranAll()){ setRunningTaskForJvm(jvmRunner.jvmId, t); //reserve the JVM LOG.info("No new JVM spawned for jobId/taskid: " + jobId+"/"+t.getTask().getTaskID() + ". Attempting to reuse: " + jvmRunner.jvmId); return; } //Cases when a JVM is killed: // (1) the JVM under consideration belongs to the same job // (passed in the argument). In this case, kill only when // the JVM ran all the tasks it was scheduled to run (in terms // of count). // (2) the JVM under consideration belongs to a different job and is // currently not busy //But in both the above cases, we see if we can assign the current //task to an idle JVM (hence we continue the loop even on a match) if ((jId.equals(jobId) && jvmRunner.ranAll()) || (!jId.equals(jobId) && !jvmRunner.isBusy())) { runnerToKill = jvmRunner; spawnNewJvm = true; } } } else { spawnNewJvm = true; } if (spawnNewJvm) { if (runnerToKill != null) { LOG.info("Killing JVM: " + runnerToKill.jvmId); runnerToKill.kill(); } spawnNewJvm(jobId, env, t); return; } //*MUST* never reach this throw new RuntimeException("Inconsistent state!!! " + "JVM Manager reached an unstable state " + "while reaping a JVM for task: " + t.getTask().getTaskID()+ " " + getDetails()); }
12 spawnNewJvm(jobId, env, t);
private void spawnNewJvm(JobID jobId, JvmEnv env, TaskRunner t) { JvmRunner jvmRunner = new JvmRunner(env,jobId); jvmIdToRunner.put(jvmRunner.jvmId, jvmRunner); //spawn the JVM in a new thread. Note that there will be very little //extra overhead of launching the new thread for a new JVM since //most of the cost is involved in launching the process. Moreover, //since we are going to be using the JVM for running many tasks, //the thread launch cost becomes trivial when amortized over all //tasks. Doing it this way also keeps code simple. jvmRunner.setDaemon(true); jvmRunner.setName("JVM Runner " + jvmRunner.jvmId + " spawned."); setRunningTaskForJvm(jvmRunner.jvmId, t); LOG.info(jvmRunner.getName()); jvmRunner.start(); }
13 jvmRunner.run()
public void run() { runChild(env); } public void runChild(JvmEnv env) { try { env.vargs.add(Integer.toString(jvmId.getId())); List<String> wrappedCommand = TaskLog.captureOutAndError(env.setup, env.vargs, env.stdout, env.stderr, env.logSize, env.pidFile); shexec = new ShellCommandExecutor(wrappedCommand.toArray(new String[0]), env.workDir, env.env); shexec.execute(); } catch (IOException ioe) { // do nothing // error and output are appropriately redirected } finally { // handle the exit code if (shexec == null) { return; } int exitCode = shexec.getExitCode(); updateOnJvmExit(jvmId, exitCode, killed); LOG.info("JVM : " + jvmId +" exited. Number of tasks it ran: " + numTasksRan); try { // In case of jvm-reuse, //the task jvm cleans up the common workdir for every //task at the beginning of each task in the task JVM. //For the last task, we do it here. if (env.conf.getNumTasksToExecutePerJvm() != 1) { FileUtil.fullyDelete(env.workDir); } } catch (IOException ie){} } }
14 shexec.execute();
public void execute() throws IOException { this.run(); }
protected void run() throws IOException { if (lastTime + interval > System.currentTimeMillis()) return; exitCode = 0; // reset for next run runCommand(); }
private void runCommand() throws IOException { ProcessBuilder builder = new ProcessBuilder(getExecString()); boolean completed = false; if (environment != null) { builder.environment().putAll(this.environment); } if (dir != null) { builder.directory(this.dir); } process = builder.start(); final BufferedReader errReader = new BufferedReader(new InputStreamReader(process .getErrorStream())); BufferedReader inReader = new BufferedReader(new InputStreamReader(process .getInputStream())); final StringBuffer errMsg = new StringBuffer(); // read error and input streams as this would free up the buffers // free the error stream buffer Thread errThread = new Thread() { @Override public void run() { try { String line = errReader.readLine(); while((line != null) && !isInterrupted()) { errMsg.append(line); errMsg.append(System.getProperty("line.separator")); line = errReader.readLine(); } } catch(IOException ioe) { LOG.warn("Error reading the error stream", ioe); } } }; try { errThread.start(); } catch (IllegalStateException ise) { } try { parseExecResult(inReader); // parse the output // clear the input stream buffer String line = inReader.readLine(); while(line != null) { line = inReader.readLine(); } // wait for the process to finish and check the exit code exitCode = process.waitFor(); try { // make sure that the error thread exits errThread.join(); } catch (InterruptedException ie) { LOG.warn("Interrupted while reading the error stream", ie); } completed = true; if (exitCode != 0) { throw new ExitCodeException(exitCode, errMsg.toString()); } } catch (InterruptedException ie) { throw new IOException(ie.toString()); } finally { // close the input stream try { inReader.close(); } catch (IOException ioe) { LOG.warn("Error while closing the input stream", ioe); } if (!completed) { errThread.interrupt(); } try { errReader.close(); } catch (IOException ioe) { LOG.warn("Error while closing the error stream", ioe); } process.destroy(); lastTime = System.currentTimeMillis(); } }
0 0
- 由TaskTracker启动到Task执行
- Task在TaskTracker上执行环境的准备
- TaskTracker启动过程
- Hadoop jobTracker taskTracker 与task 的联系
- hadoop作业执行之TaskTracker
- MapReduce(七): TaskTracker执行任务
- hadoop作业执行之TaskTracker
- taskTracker和jobTracker 启动失败
- tasktracker不能启动 报Tasktracker disallowed by JobTracker信息
- TaskTracker
- uefi启动解析:由原理到实例
- taskTracker请求任务执行任务的过程
- JobTracker响应TaskTracker心跳及调度task源码级分析
- mapreduce源码分析之TaskTracker的启动
- Spark学习之8:Stage提交到Task执行
- 在Jdeveloper 11gR2中传递url参数到由task flow构成的区中
- 在Jdeveloper 11gR2中传递url参数到由task flow构成的区中
- Hadoop对Map执行框架的实现(TaskTracker端)
- 黑马程序员—集合框架小结
- Openstack rootwrap
- Android之传感器
- Linux 编译 sphinx
- Flex swf 升级后,客户端不需要清空缓存即可查看最新版本的使用说明
- 由TaskTracker启动到Task执行
- 73-特殊回文数的判断
- Google编程风格-值得借鉴
- 兼容性之Dom加入元素
- 译文:Why you choose stainless steel flagpoles
- plan
- Pear编码标准 10、13
- Windows启动过程详解
- ffmpeg 新老接口问题及对照集锦