多个MR一起执行的Driver代码

来源:互联网 发布:总统和主席的区别知乎 编辑:程序博客网 时间:2024/06/06 10:01
package com.it18zhang.day05.flow5;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapred.JobConf;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class MODLE {    public static void main(String[] args) throws Exception {        JobConf conf = new JobConf(MODLE.class);        Job job1 = new Job(conf, "join1");        FileSystem.get(conf).delete(new Path(args[1]), true);        FileSystem.get(conf).delete(new Path(args[2]), true);        job1.setJarByClass(MODLE.class);        // 设置我们的业务逻辑的Mapper Reduce        job1.setMapperClass(FlowMapper.class);        job1.setMapOutputKeyClass(Text.class);        job1.setMapOutputValueClass(FlowBean.class);        job1.setReducerClass(FlowReduce.class);        job1.setOutputKeyClass(Text.class);        job1.setOutputValueClass(FlowBean.class);        FileInputFormat.setInputPaths(job1, new Path(args[0]));        FileOutputFormat.setOutputPath(job1, new Path(args[1]));        // 加入控制容器        ControlledJob ctrljob1 = new ControlledJob(conf);        ctrljob1.setJob(job1);        // 第二个作业的配置            Job job2 = new Job(conf, "Join2");            job2.setJarByClass(MODLE.class);            //1.Map和reduce            job2.setMapperClass(FlowSortMapper.class);            job2.setMapOutputKeyClass(FlowBean.class);            job2.setMapOutputValueClass(Text.class);            job2.setReducerClass(FlowSortReduce.class);            job2.setOutputKeyClass(Text.class);            job2.setOutputValueClass(FlowBean.class);            FileInputFormat.setInputPaths(job2, new Path(args[1]));            FileOutputFormat.setOutputPath(job2, new Path(args[2]));            // 作业2加入控制容器            ControlledJob ctrljob2 = new ControlledJob(conf);            ctrljob2.setJob(job2);            // 设置多个作业直接的依赖关系            // 如下所写:            // 意思为job2的启动,依赖于job1作业的完成            ctrljob2.addDependingJob(ctrljob1);            // 主的控制容器,控制上面的总的两个子作业            JobControl jobCtrl = new JobControl("myctrl");            // 添加到总的JobControl里,进行控制            jobCtrl.addJob(ctrljob1);            jobCtrl.addJob(ctrljob2);            // 在线程启动,记住一定要有这个            Thread t = new Thread(jobCtrl);            t.start();            while (true) {                if (jobCtrl.allFinished()) {// 如果作业成功完成,就打印成功作业的信息                    System.out.println(jobCtrl.getSuccessfulJobList());                    jobCtrl.stop();                    break;                }            }    }}