jobcontrol
来源:互联网 发布:php大数据框架 编辑:程序博客网 时间:2024/05/18 13:08
jobcontrol可以实现多个job结合起来运行。下面就是有两个job的jobcontrol,第一个job的输出是第二个job的输入。
package hadoop;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;import scala.reflect.generic.Trees.New;public class WordCount {public static class WordCountMapper extends Mapper<Object, Text, Text, IntWritable>{private static final IntWritable Number = new IntWritable(1);private Text word = new Text();@Overrideprotected void map(Object key, Text value, Context context)throws IOException, InterruptedException {StringTokenizer stringTokenizer = new StringTokenizer(value.toString());while(stringTokenizer.hasMoreTokens()){String string = stringTokenizer.nextToken();word.set(string);context.write(word, Number);}}}public static class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable>{@Overrideprotected void reduce(Text key, Iterable<IntWritable> vlaues,Context context) throws IOException, InterruptedException {int num=0;for(IntWritable intWritable:vlaues){num+=intWritable.get();}context.write(key, new IntWritable(num));}}public static class WordCountMapper1 extends Mapper<Object, Text, Text, IntWritable>{private static final IntWritable Number = new IntWritable(1);private Text word = new Text();@Overrideprotected void map(Object key, Text value, Context context)throws IOException, InterruptedException {StringTokenizer stringTokenizer = new StringTokenizer(value.toString());while(stringTokenizer.hasMoreTokens()){String string = stringTokenizer.nextToken();word.set(string);context.write(word, Number);}}}public static class WordCountReduce1 extends Reducer<Text, IntWritable, Text, IntWritable>{@Overrideprotected void reduce(Text key, Iterable<IntWritable> vlaues,Context context) throws IOException, InterruptedException {int num=0;for(IntWritable intWritable:vlaues){num+=intWritable.get();}context.write(key, new IntWritable(num));}}public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {Configuration conf = new Configuration();String[] argsValues = new GenericOptionsParser(conf, args).getRemainingArgs();JobControl jobControl = new JobControl("jobcontrol");Job job = new Job(conf, "word count1"); job.setJarByClass(WordCount.class);job.setMapperClass(WordCountMapper.class);job.setReducerClass(WordCountReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class);FileInputFormat.addInputPaths(job, argsValues[0]);FileOutputFormat.setOutputPath(job, new Path(argsValues[1]));Job job2 = new Job(conf, "word count2"); job2.setJarByClass(WordCount.class);job2.setMapperClass(WordCountMapper1.class);job2.setReducerClass(WordCountReduce1.class);job2.setOutputKeyClass(Text.class);job2.setOutputValueClass(IntWritable.class);FileInputFormat.addInputPaths(job2, argsValues[1]);FileOutputFormat.setOutputPath(job2, new Path(argsValues[2]));ControlledJob controlledJob = new ControlledJob(conf);controlledJob.setJob(job);ControlledJob controlledJob2 = new ControlledJob(conf);controlledJob2.setJob(job2);controlledJob2.addDependingJob(controlledJob);jobControl.addJob(controlledJob);jobControl.addJob(controlledJob2);Thread thread = new Thread(jobControl);thread.start();while(true){if(jobControl.allFinished()){System.out.println(jobControl.getSuccessfulJobList());jobControl.stop();break;}}}}
输出结果为
[hadoop@master local]$ hadoop fs -cat /test/test.txthellohadoophello hi [hadoop@master local]$ hadoop fs -ls /test/outputFound 2 items-rw-r--r-- 1 hadoop supergroup 0 2017-06-20 14:55 /test/output/_SUCCESS-rw-r--r-- 1 hadoop supergroup 22 2017-06-20 14:55 /test/output/part-r-00000[hadoop@master local]$ hadoop fs -cat /test/output/part-r-00000hadoop1hello2hi1[hadoop@master local]$ hadoop fs -ls /test/output1Found 2 items-rw-r--r-- 1 hadoop supergroup 0 2017-06-20 14:57 /test/output1/_SUCCESS-rw-r--r-- 1 hadoop supergroup 30 2017-06-20 14:57 /test/output1/part-r-00000[hadoop@master local]$ hadoop fs -cat /test/output1/part-r-000001221hadoop1hello1hi1
阅读全文
0 0
- jobcontrol
- jobcontrol类
- JobControl的实现原理
- Hadoop JobControl Job迭代
- Hadoop工作流引擎之JobControl
- 如何使用Hadoop的JobControl
- 使用JobControl管理mapreduce多job依赖
- Hadoop的JobControl设计及用法
- 使用JobControl管理mapreduce多job依赖
- JobControl的使用及获取计数器
- Hadoop中JobControl的用法,关于Job迭代
- Hadoop使用JobControl设置job之间的依赖关系
- Hadoop 依赖关系作业 MapReduce JobControl 错误纠正
- MapReduce--7--求共同好友--改进版JobControl
- Portal for arcgis 服务器集成方式区别
- 自写简单下拉列表
- 游标什么时候消失
- sql 取年份后两位和月份
- 高斯噪声/白噪声/高斯白噪声的区别
- jobcontrol
- 第一个Struts2项目(下)
- 文章标题
- Java创建文件夹并上传文件到该文件夹
- Python字符串与数字输出
- 仿微信朋友圈【九宫格的实现】
- 基于c#的双缓存技术绘图
- 从sqlcommandbuilder引出的小想法
- Kotlin 初探