JobControl的使用及获取计数器

来源:互联网 发布:管理系统常用算法 编辑:程序博客网 时间:2024/05/18 13:45

JobControl的使用


1.JobControl配置

//1.首先声明一个JobControlJobControl jobControl = new JobControl("groupName");//2.然后写conf的配置Configuration conf = new Configuration();conf.set("name","value");...省略配置......一般这里会判断输出路径是否存在...FileSystem fileSystem = FileSystem.get(new Configuration());if (fileSystem.exists(new Path(outputPath))) {                LOG.warn("output: " + outputPath + " already exists! DELETE");                fileSystem.delete(new Path(outputPath), true); }//3.开始写job配置Job job = Job.getInstance(conf);            trackViewJob.setJarByClass(xxx.class);            trackViewJob.setJobName("jobName");            trackViewJob.setInputFormatClass(OrcNewInputFormat.class);            for(String date: dateList) {                //这里最好加一个输入路径是否存在的判断                MultipleInputs.addInputPath(job, new Path(inputPath), RCFileMapReduceInputFormat.class, TrackDataMapper.class);            }            job.setMapperClass(TrackDataMapper.class);            job.setReducerClass(TrackDataReducer.class);            job.setMapOutputKeyClass(TextTuple.class);            job.setMapOutputValueClass(TextTuple.class);            job.setNumReduceTasks(5000);            job.setOutputKeyClass(TextTuple.class);            job.setOutputValueClass(TextTuple.class);            job.setOutputFormatClass(SequenceFileOutputFormat.class);            SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));//4.job写完,创建ControlledJob            //job加入ControlledJob                        ControlledJob cj = new ControlledJob(conf);            cj.setJob(job);//5.如果有其他依赖,则把依赖写在这里,例如            cj.addDependingJob(other_cj1);            cj.addDependingJob(other_cj2);            //表示当前cj依赖于其他other_cj完成,他才能提交运行,有多个就add多个            //如果他不需要其他程序运行完在运行,则这里可以不写//6.把刚创建的 ControlledJob加入 JobControl            jobControl.addJob(cj);以上完成JobControl配置下面运行

2.JobControl运行

应该有直接运行的方法。下面是自己写的方法
直接贴代码了,不多说。

调用方法:
boolean res = RunTool.runJobControll(job, true);

package cn.com.xiaoxiang.common.tools.mapreduce;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;import org.apache.log4j.Logger;import java.io.IOException;import java.util.HashMap;import java.util.List;import java.util.Map;/** * Created by xiaoxiang on 08/08/2017. */public class RunTool {    private static final Logger LOG = Logger.getLogger(RunTool.class);    public static boolean runJobControll(JobControl jobControl, boolean verbose)            throws InterruptedException, IOException {        LOG.info("start to run job control with model" + (verbose ? " verbose" : " clean"));        Thread runningThread = new Thread(jobControl);        runningThread.start();        Map<String, Integer> jobProgress = null;        if (verbose) {            jobProgress = new HashMap<String, Integer>();        }        int numWait = -1;        int numRunning = -1;        int numSuccess = -1;        int numFailed = -1;              while (!jobControl.allFinished()) {            Thread.sleep(10 * 1000);            if (verbose) {                int wait = jobControl.getWaitingJobList().size();                int running  = jobControl.getRunningJobList().size();                int success = jobControl.getSuccessfulJobList().size();                int failed = jobControl.getFailedJobList().size();                if (numWait != wait) {                    numWait = wait;                    LOG.info("job control state alert -- waiting jobs: " + numWait);                }                if (numRunning != running) {                    numRunning = running;                    LOG.info("job control state alert -- running jobs: " + numRunning);                }                if (numSuccess != success) {                    numSuccess = success;                    LOG.info("job control state alert -- successful jobs: " + numSuccess);                }                if (failed != numFailed) {                    numFailed = failed;                    LOG.info("job control state alert -- failed jobs: " + numFailed);                }                if (failed > 0) {                    jobControl.stop();                    LOG.info("some controlled job failed! stop the job control");                    LOG.info("stop all running jobs");                    for (ControlledJob wcj: jobControl.getRunningJobList()) {                        LOG.info("killing job: " + wcj.getJobName());                        wcj.killJob();                    }                    break;                }                for (ControlledJob cj: jobControl.getRunningJobList()) {                    String jobId = cj.getJobID();                    Job job = cj.getJob();                    int currentJobProgress = (int) (100 * (0.5 * job.mapProgress() + 0.5 * job.reduceProgress()));                    if (!jobProgress.containsKey(jobId) || jobProgress.get(jobId) != currentJobProgress) {                        LOG.info("Controlled Job Alert -- job: " + job.getJobName() +                                ", progress: " + currentJobProgress + "%" +                                ", track url: " + job.getTrackingURL());                        jobProgress.put(jobId, currentJobProgress);                    }                }            }        }        List<ControlledJob> failedList = jobControl.getFailedJobList();        for (ControlledJob fcj: failedList) {            LOG.error("job: " + fcj.getJobName() + " failed!");        }        return failedList.size() == 0;    }}

3.JobControl成功或失败判断

if (res) {                    //成功,获取计数器的值                List<ControlledJob> finishList = job.getSuccessfulJobList();                for (ControlledJob controlledJob : finishList) {                        Counters counters = controlledJob.getJob().getCounters();                               //**下面这部分输出所有的计数器,调试用,不管什么,大大小小的统计一并输出                        Iterable<String> gcList = counters.getGroupNames(); //获取所有计数器名字                        for (String counter_name : gcList) {                            CounterGroup gc = counters.getGroup(counter_name);                            cnt.add( counter_name +"# start ~~~~~");                            for (Counter counter : gc) {                                cnt.add( counter.getName() +"#"+counter.getValue());                                LOG.info( counter.getName() + "\t" + counter.getValue() );                            }                            cnt.add( counter_name +"# end ~~~~~");                    }                        /**最终要的结果,只有每部分输出以及最终输出统计                        CounterGroup gc = counters.getGroup(Consts.COUNTER_NAME);                        for (Counter counter : gc) {                            cnt.add( counter.getName() +"#"+counter.getValue());                            LOG.info( counter.getName() + "\t" + counter.getValue() );                        }                        **/                     }            }else {                //失败,获取失败的任务名字                List<ControlledJob> failedList = job.getFailedJobList();                for (ControlledJob fail : failedList) {                    failed.add( fail.getJob().getJobName() );                    LOG.info("### Failed:\t" + fail.getJob().getJobName());                }            }

如果是多线程,那么需要一个锁。

    synchronized (this) {                isSucc = res;                isFinished = true;    }

获取状态的一些方法

4.JobControl获取计数器值

代码在3中都有,既可以获取指定计数器的值,也可以获取所有mapreduce 计数器的值

Iterable<String> gcList = counters.getGroupNames(); //获取所有计数器名字,这个包括了mapreduce所有的计数器,内部计数器那些                        for (String counter_name : gcList) {                                //又起要注意,这里迭代出的是所有计数器                                //不止用户自定义的计数器,还包括程序输入输出等一些内部计数器                        }
原创粉丝点击