Mapreduce算法二、数据去重(HashSet)
来源:互联网 发布:大金鹅复制软件 编辑:程序博客网 时间:2024/05/29 19:45
package MRDemo;import java.io.IOException;import java.util.HashSet;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import MRDemo.PV.PVCombine;import MRDemo.PV.PVMap;import MRDemo.PV.PVReduce;import com.google.common.collect.Sets;public class ProductKind {public static void main(String[] args) throws Exception { if (args.length!=2) { System.exit(0); } Job job = Job.getInstance(new Configuration(), "ProductKind"); job.setJarByClass(ProductKind.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(ProductCountMap.class); //job.setCombinerClass(PVCombine.class); job.setReducerClass(ProductCountReduce.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.waitForCompletion(true);}public static class ProductCountMap extends Mapper<LongWritable, Text, Text, Text>{ Text province = new Text(); Text kind = new Text();protected void map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable,Text,Text,Text>.Context context) throws java.io.IOException ,InterruptedException { String[] lines = value.toString().split("\t"); if (lines.length == 6) { kind.set(lines[0].trim()); province.set(lines[4].trim() ); context.write(province, kind); }}; }//public static class ProductCountReduce extends Reducer<Text, Text, Text, IntWritable>{ protected void reduce(Text key, java.lang.Iterable<Text> values, org.apache.hadoop.mapreduce.Reducer<Text,Text,Text,IntWritable>.Context context) throws java.io.IOException ,InterruptedException { //context.write(new Text("getHttp_user_agent"), new IntWritable(1)); HashSet<String> hashSet = new HashSet<String>(); for (Text value : values) { hashSet.add(value.toString()); } if(hashSet.size()>0){ context.write(key, new IntWritable(hashSet.size())); } };}}
0 0
- Mapreduce算法二、数据去重(HashSet)
- MapReduce算法形式二:去重(HashSet)
- MapReduce算法形式二:去重(shuffle)
- MapReduce基础开发之二数据去重和排序
- hadoop mapreduce数据去重
- MapReduce 之 数据去重
- MapReduce实例----数据去重
- 【MapReduce实例】数据去重
- MapReduce数据去重程序实验
- MapReduce编程之数据去重
- MapReduce编程实例之数据去重
- Hadoop MapReduce数据去重程序
- HashSet去重
- HashSet去重
- HashSet 集合 去重
- HashSet去重
- HashSet去重原理
- set hashSet 去重
- Js中for循环的阻塞机制
- 数学分支及其工程意义
- 1001. 害死人不偿命的(3n+1)猜想 (15)
- Java多线程七:线程调度(休眠)sleep()
- 南阳oj 517 最小公倍数
- Mapreduce算法二、数据去重(HashSet)
- POJ 3190 Stall Reservations
- Javascript面试题:如何给一个Javascript的对象属性赋值?如何取得属性值?
- 防止XSS攻击,过滤代码
- Part 61 - Named sections in layout files in mvc
- 线段树
- Java路径问题
- NYOJ - 91 - 阶乘之和(贪心算法)
- 通用定时器-输入捕获实验