jobcontrol

来源:互联网 发布:php大数据框架 编辑:程序博客网 时间:2024/05/18 13:08

jobcontrol可以实现多个job结合起来运行。下面就是有两个job的jobcontrol,第一个job的输出是第二个job的输入。

package hadoop;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;import scala.reflect.generic.Trees.New;public class WordCount {public static class WordCountMapper extends Mapper<Object, Text, Text, IntWritable>{private static final IntWritable Number = new IntWritable(1);private Text word = new Text();@Overrideprotected void map(Object key, Text value, Context context)throws IOException, InterruptedException {StringTokenizer stringTokenizer = new StringTokenizer(value.toString());while(stringTokenizer.hasMoreTokens()){String string = stringTokenizer.nextToken();word.set(string);context.write(word, Number);}}}public static class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable>{@Overrideprotected void reduce(Text key, Iterable<IntWritable> vlaues,Context context) throws IOException, InterruptedException {int num=0;for(IntWritable intWritable:vlaues){num+=intWritable.get();}context.write(key, new IntWritable(num));}}public static class WordCountMapper1 extends Mapper<Object, Text, Text, IntWritable>{private static final IntWritable Number = new IntWritable(1);private Text word = new Text();@Overrideprotected void map(Object key, Text value, Context context)throws IOException, InterruptedException {StringTokenizer stringTokenizer = new StringTokenizer(value.toString());while(stringTokenizer.hasMoreTokens()){String string = stringTokenizer.nextToken();word.set(string);context.write(word, Number);}}}public static class WordCountReduce1 extends Reducer<Text, IntWritable, Text, IntWritable>{@Overrideprotected void reduce(Text key, Iterable<IntWritable> vlaues,Context context) throws IOException, InterruptedException {int num=0;for(IntWritable intWritable:vlaues){num+=intWritable.get();}context.write(key, new IntWritable(num));}}public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {Configuration conf = new Configuration();String[] argsValues = new GenericOptionsParser(conf, args).getRemainingArgs();JobControl jobControl = new JobControl("jobcontrol");Job job = new Job(conf, "word count1");  job.setJarByClass(WordCount.class);job.setMapperClass(WordCountMapper.class);job.setReducerClass(WordCountReduce.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(IntWritable.class);FileInputFormat.addInputPaths(job, argsValues[0]);FileOutputFormat.setOutputPath(job, new Path(argsValues[1]));Job job2 = new Job(conf, "word count2");  job2.setJarByClass(WordCount.class);job2.setMapperClass(WordCountMapper1.class);job2.setReducerClass(WordCountReduce1.class);job2.setOutputKeyClass(Text.class);job2.setOutputValueClass(IntWritable.class);FileInputFormat.addInputPaths(job2, argsValues[1]);FileOutputFormat.setOutputPath(job2, new Path(argsValues[2]));ControlledJob controlledJob = new ControlledJob(conf);controlledJob.setJob(job);ControlledJob controlledJob2 = new ControlledJob(conf);controlledJob2.setJob(job2);controlledJob2.addDependingJob(controlledJob);jobControl.addJob(controlledJob);jobControl.addJob(controlledJob2);Thread thread = new Thread(jobControl);thread.start();while(true){if(jobControl.allFinished()){System.out.println(jobControl.getSuccessfulJobList());jobControl.stop();break;}}}}

输出结果为

[hadoop@master local]$ hadoop fs -cat  /test/test.txthellohadoophello hi [hadoop@master local]$ hadoop fs -ls /test/outputFound 2 items-rw-r--r--   1 hadoop supergroup          0 2017-06-20 14:55 /test/output/_SUCCESS-rw-r--r--   1 hadoop supergroup         22 2017-06-20 14:55 /test/output/part-r-00000[hadoop@master local]$ hadoop fs -cat /test/output/part-r-00000hadoop1hello2hi1[hadoop@master local]$ hadoop fs -ls  /test/output1Found 2 items-rw-r--r--   1 hadoop supergroup          0 2017-06-20 14:57 /test/output1/_SUCCESS-rw-r--r--   1 hadoop supergroup         30 2017-06-20 14:57 /test/output1/part-r-00000[hadoop@master local]$ hadoop fs -cat /test/output1/part-r-000001221hadoop1hello1hi1


原创粉丝点击