多个mapreduce工作相互依赖处理方法完整实例

来源:互联网 发布:python 中英文对照 编辑:程序博客网 时间:2024/06/05 17:01

package org.mahao.mr.iplocation;import java.io.IOException;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapred.JobConf;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.mahao.mr.kpi.KPI;public class KPIAddress{//第一个Job的map函数public static int ipnum = 0;public static class KPIAddressMap extends Mapper<Object, Text, Text, IntWritable>{private IntWritable one = new IntWritable(1);private Text ips = new Text();@Overrideprotected void map(Object key, Text value, Context context)throws IOException, InterruptedException {KPI kpi = KPI.filterIPs(value.toString());if(kpi.isValid()){ips.set(kpi.getRemote_addr());context.write(ips,one);}}}   //第一个Job的reduce函数  public static class KPIAddressReduce extends Reducer<Text, IntWritable, Text, Text>{IPSeeker ipseeker = IPSeeker.getInstance();@Overrideprotected void reduce(Text key, Iterable<IntWritable> values,Context context)throws IOException, InterruptedException {String address = ipseeker.getAddress(key.toString());int num = 0;for (IntWritable value : values) {num += value.get();}context.write(key, new Text(String.valueOf(num)+"+"+address));}}//第二个job的map函数  public static class KPIAddressMap1 extends Mapper<Object, Text, Text, IntWritable>{@Overrideprotected void map(Object key, Text value,Context context)throws IOException, InterruptedException {String line = value.toString();String str = "";int sep = line.indexOf("+");int num = Integer.parseInt(line.substring(sep-1, sep));//get the times of ipString address = line.substring(sep+1);ipnum += num;context.write(new Text(address), new IntWritable(num));}}//第二个Job的reduce函数 public static class KPIAddressReduce1 extends Reducer<Text, IntWritable, Text, Text>{@Overrideprotected void reduce(Text key, Iterable<IntWritable> values,Context context)throws IOException, InterruptedException {int num = 0;for (IntWritable value : values) {num += value.get();}float percent =(float)num/(float)ipnum*100;context.write(key, new Text(" "+String.valueOf(percent)+"%"));}}public static void main(String[] args) throws Exception {Path tempDir = new Path(args[1]);//a temporary fileJobConf conf = new JobConf(KPIAddress.class); //第一个job的配置Job job1 = new Job(conf,"KPIAddress1");job1.setJarByClass(KPIAddress.class);//set a job class   job1.setMapperClass(KPIAddressMap.class);job1.setReducerClass(KPIAddressReduce.class);job1.setMapOutputKeyClass(Text.class);job1.setMapOutputValueClass(IntWritable.class);job1.setOutputKeyClass(Text.class);job1.setOutputValueClass(Text.class);//加入控制容器            ControlledJob ctrljob1=new  ControlledJob(conf);           ctrljob1.setJob(job1);          FileInputFormat.addInputPath(job1, new Path(args[0]));        FileOutputFormat.setOutputPath(job1, tempDir); //第二个job的配置 Job job2= new Job(conf,"KPIAddress2");job2.setJarByClass(KPIAddress.class);job2.setMapperClass(KPIAddressMap1.class);job2.setReducerClass(KPIAddressReduce1.class);job2.setMapOutputKeyClass(Text.class);job2.setMapOutputValueClass(IntWritable.class);job2.setOutputKeyClass(Text.class);job2.setOutputValueClass(Text.class);//作业2加入控制容器 ControlledJob ctrljob2 = new ControlledJob(conf);ctrljob2.setJob(job2); //设置多个作业直接的依赖关系          //如下所写:          //意思为job2的启动,依赖于job1作业的完成    ctrljob2.addDependingJob(ctrljob1);//输入路径是上一个作业的输出路径,因此这里填tempDir,要和上面对应好     FileInputFormat.addInputPath(job2, tempDir);      //输出路径FileOutputFormat.setOutputPath(job2, new Path(args[2]));   //主的控制容器,控制上面的总的两个子作业           JobControl jobCtrl=new JobControl("myctrl");        //添加到总的JobControl里,进行控制         jobCtrl.addJob(ctrljob1);           jobCtrl.addJob(ctrljob2);          //在线程启动,记住一定要有这个          Thread  t=new Thread(jobCtrl);           t.start();                 while(true){              if(jobCtrl.allFinished()){//如果作业成功完成,就打印成功作业的信息             System.out.println(jobCtrl.getSuccessfulJobList());              jobCtrl.stop();              break;                 }              } }}


0 0
原创粉丝点击