MapReduce---chain链条式操作
来源:互联网 发布:jsp如何引入java类 编辑:程序博客网 时间:2024/04/30 18:20
准备数据:
hello world of tom1
hello world of tom1
hello world of tom2
hello world of tom3
hello world of tom3
hello world of tom4
hello world of tom4
思路分析图:
1、Mapper1(切割单词)
2、Mapper2 (滤掉单词中of)
3、Mapper2_2(滤掉单词中带tom的)
4、Mapper3(reduce的输出是Mapper3的输入,滤掉单次数量少于1=次的)
1、Mapper1(切割单词)
package hadoop.mr.chain;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;/** * */public class Mapper1 extends Mapper<LongWritable,Text,Text,IntWritable>{protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {System.out.println("map1 : " + value.toString());String line = value.toString();String[] arr = line.split(" ");for(String w : arr){context.write(new Text(w),new IntWritable(1));}}}
package hadoop.mr.chain;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;/** * mapper */public class Mapper2 extends Mapper<Text,IntWritable,Text,IntWritable>{protected void map(Text key, IntWritable value, Context context) throws IOException, InterruptedException {System.out.println("map2 : " + key.toString());String w = key.toString();if(!w.equals("of")){context.write(key,value);}}}
3、Mapper2_2(滤掉单词中带tom的)
package hadoop.mr.chain;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;/** * mapper */public class Mapper2_2 extends Mapper<Text,IntWritable,Text,IntWritable>{protected void map(Text key, IntWritable value, Context context) throws IOException, InterruptedException {System.out.println("map2_2 : " + key.toString());String w = key.toString();if(!w.startsWith("tom")){context.write(key,value);}}}
4、Reducer1
package hadoop.mr.chain;import hadoop.join.reduce.CombKey;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;import java.util.Iterator;/** * reduce1 */public class Reducer1 extends Reducer<Text,IntWritable,Text,IntWritable>{protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {int count = 0 ;for(IntWritable w: values){count = count + w.get() ;}context.write(key,new IntWritable(count));System.out.println("redu : " + key.toString() + " = " + count);}}
5、Mapper3(reduce的输出是Mapper3的输入,滤掉单次数量少于1=次的)
package hadoop.mr.chain;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;/** * mapper */public class Mapper3 extends Mapper<Text,IntWritable,Text,IntWritable>{protected void map(Text key, IntWritable value, Context context) throws IOException, InterruptedException {System.out.println("map3 : " + key.toString());int cnt = value.get();if(cnt > 1){context.write(key,value);}}}
package hadoop.mr.chain;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;/** * reduce1 */public class Combiner1 extends Reducer<Text,IntWritable,Text,IntWritable>{protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {int count = 0 ;for(IntWritable w: values){count = count + w.get() ;}context.write(key,new IntWritable(count));System.out.println("Combiner1 : " + key.toString() + " = " + count);}}
7、MyPartitioner
package hadoop.mr.chain;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Partitioner;/** * 分区 */public class MyPartitioner extends Partitioner<Text,IntWritable>{public int getPartition(Text text, IntWritable intWritable, int numPartitions) {System.out.println("par : " + text.toString());return (text.hashCode() & Integer.MAX_VALUE )% numPartitions;}}
8、App
package hadoop.mr.chain;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.chain.ChainMapper;import org.apache.hadoop.mapreduce.lib.chain.ChainReducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;/** */public class App {public static void main(String[] args) throws Exception {args = new String[]{"d:/java/mr/data/1.txt", "d:/java/mr/out"} ;Configuration conf = new Configuration();FileSystem fs = FileSystem.get(conf);if(fs.exists(new Path(args[1]))){fs.delete(new Path(args[1]),true);}Job job = Job.getInstance(conf);job.setJobName("chain");job.setJarByClass(App.class);//添加mapper1ChainMapper.addMapper(job,Mapper1.class, LongWritable.class,Text.class,Text.class,IntWritable.class,conf);//添加mapper1ChainMapper.addMapper(job,Mapper2.class, Text.class,IntWritable.class,Text.class,IntWritable.class,conf);ChainMapper.addMapper(job,Mapper2_2.class, Text.class,IntWritable.class,Text.class,IntWritable.class,conf);//设置reducer(***1***)ChainReducer.setReducer(job,Reducer1.class,Text.class,IntWritable.class,Text.class,IntWritable.class,conf);//在reduce后增加一个环节ChainReducer.addMapper(job, Mapper3.class, Text.class, IntWritable.class, Text.class, IntWritable.class, conf);FileInputFormat.addInputPath(job,new Path(args[0]));FileOutputFormat.setOutputPath(job,new Path(args[1]));//分区job.setPartitionerClass(MyPartitioner.class);job.setCombinerClass(Combiner1.class);job.setNumReduceTasks(3);job.waitForCompletion(true) ;}}
阅读全文
0 0
- MapReduce---chain链条式操作
- 响应者链条-(What is responder chain)
- 链条式编程
- Problem A. Snapper Chain 问题A.按扣链条 解决办法
- Hadoop MapReduce进阶 使用Chain
- Hadoop MapReduce进阶 使用Chain
- Google Code Jam 2010 Qualification Round 资格赛 Problem A. Snapper Chain 问题A.按扣链条
- How to chain multiple MapReduce jobs in Hadoop
- Hadoop,MapReduce操作Mysql
- MapReduce操作HBase
- MapReduce之Join操作
- MapReduce实现join操作
- Hadoop,MapReduce操作Mysql
- java操作mongodb mapreduce
- MapReduce操作HBase
- Hadoop,MapReduce操作Mysql
- MapReduce操作HBase
- Hadoop,MapReduce操作Mysql
- yii2表单get多次提交时一直在地址重复拼接参数
- 阻塞非阻塞与同步异步的区别
- 对定义局部变量位置的思考
- Dockerfile中RUN bash -c 'touch /app.jar'是干嘛的
- react ref 属性
- MapReduce---chain链条式操作
- 2017.8.18---------树状数组逆序+离散
- 数据结构数据结构系统学习之路
- 使用istream对象作为条件
- 代码块
- 在ubuntu 16.04上安装docker
- 个人总结(1)
- 接口的幂等性设计
- (转载)ztree 添加节点的图标无法正常显示解决方法