mapreduce GroupingComparator mapreduce排序规则和分组规则

来源:互联网 发布:淘客网站源码建立 编辑:程序博客网 时间:2024/06/06 00:43

mapreduce自定义GroupingComparator

1.他的作用:先看图


GroupingComparator是maptasks之前的阶段,如果没有groupingcomparator那么当key为bean时候,二个bean的所有成员变量都相等时候,才会被reduce接收到一组去。
而groupingcomparator是在二个bean有成员变量不想等的时候,它可以做一些手脚,欺骗reduce,让它认为二个bean是相同的key
那么这么什么好处呢
很简单把一些工作提前做了,减少reduce的压力,

2.排序规则和分组规则

2.1如果不自定义public class MyGroupingComparator  extends WritableComparator 的话,排序规则和分组规则是一样的

2.2当我们自定义public class MyGroupingComparator  extends WritableComparator 的话,分组规则就是在排序规则上减少要求:

2.3区别

看个代码片段
@Overridepublic int compareTo(Shopping o) {return this.p_id-o.p_id==0?o.money-this.money:this.p_id-o.p_id;}
这个要求是如果p_id相同的,并不是就都进一个组了,还要money相同才能进一个组里边,
当我们自定义分组规则时候


现在就是p_id相同就进一个组了

2.4细节说明




分析


上图中compareTo规则依次是A->B->C->D

而compare的规则只能从上边的规则后边减少
可能是 ABC
可能是AB
可能是A
也就是说我把一个水管的闸慢慢打开了,让更多的元素当成一组了,,

3.看个案例

TopN:order_id    p_id    money 
1 1 222
1 5 25
2 2 2000
2 4 122
2 5 722
3 1 222
1 1 1000
1 5 5000
2 3 3000
2 4 4000
2 2 722
3 1 221
需求:求第二个字段中最大的前二个值
shopp
package cn.yzx.bigdata.mr.groupingcomparator;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.io.Writable;import org.apache.hadoop.io.WritableComparable;/* * ������Ҫ���ÿһ�������гɽ��������һ�ʽ��� * Order_0000001Pdt_01222.8Order_0000001Pdt_0525.8Order_0000002Pdt_03522.8Order_0000002Pdt_04122.4Order_0000002Pdt_05722.4Order_0000003Pdt_01222.8 */public class Shopping implements WritableComparable<Shopping>{private int order_id;private int p_id;private int money;public Shopping() {}public Shopping(int order_id, int p_id, int money) {this.order_id = order_id;this.p_id = p_id;this.money = money;}public int getOrder_id() {return order_id;}public void setOrder_id(int order_id) {this.order_id = order_id;}public int getP_id() {return p_id;}public void setP_id(int p_id) {this.p_id = p_id;}public int getMoney() {return money;}public void setMoney(int money) {this.money = money;}@Overridepublic String toString() {return order_id + "," + p_id + "," + money;}@Overridepublic void write(DataOutput out) throws IOException {out.writeInt(order_id);out.writeInt(p_id);out.writeInt(money);}@Overridepublic void readFields(DataInput in) throws IOException {order_id=in.readInt();p_id=in.readInt();money=in.readInt();}@Overridepublic int compareTo(Shopping o) {return this.p_id-o.p_id==0?o.money-this.money:this.p_id-o.p_id;}}

MyGroupingComparator


package cn.yzx.bigdata.mr.groupingcomparator;import org.apache.hadoop.io.WritableComparable;import org.apache.hadoop.io.WritableComparator;public class MyGroupingComparator  extends WritableComparator {protected MyGroupingComparator() {super(Shopping.class,true);}@Overridepublic int compare(WritableComparable a, WritableComparable b) {Shopping abean=(Shopping) a;Shopping bbean=(Shopping) b;return abean.getP_id()-bbean.getP_id();}}

GroupingComparator


package cn.yzx.bigdata.mr.groupingcomparator;import java.io.File;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;/* * private int;private int private int ;\] order_idp_id;money1 1222152523522 */public class GroupingComparator {static class GroupingComparatorMapper extends Mapper<LongWritable, Text, Shopping, NullWritable>{Shopping s=new Shopping();@Overrideprotected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {String line = value.toString();String[] fields = line.split("\t");s.setOrder_id(Integer.parseInt(fields[0]));s.setP_id(Integer.parseInt(fields[1]));s.setMoney(Integer.parseInt(fields[2]));context.write(s, NullWritable.get());}}static class GroupingComparatorReducer extends Reducer<Shopping, NullWritable, Shopping, NullWritable>{private static final int TopN=2;@Overrideprotected void reduce(Shopping Shopping, Iterable<NullWritable> values,Context context)throws IOException, InterruptedException {int count=0;for (NullWritable value:values) {if(count<TopN) {context.write(Shopping, NullWritable.get());count++;}}}}public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {Configuration conf = new Configuration();conf.set("mapreduce.framework.name", "local");conf.set("fs.defaultFS", "file:///");conf.set("mapred.textoutputformat.separator", " ");Job job = Job.getInstance(conf);job.setJarByClass(GroupingComparator.class);job.setMapperClass(GroupingComparatorMapper.class);job.setReducerClass(GroupingComparatorReducer.class);job.setGroupingComparatorClass(MyGroupingComparator.class);job.setOutputKeyClass(Shopping.class);job.setOutputValueClass(NullWritable.class);FileInputFormat.setInputPaths(job, new Path("C:/mapreduce/Shoppinginput"));Path outpath = new Path("C:/mapreduce/Shoppingoutput");FileSystem fs = FileSystem.get(conf);if(fs.exists(outpath)) {fs.delete(outpath, true);}FileOutputFormat.setOutputPath(job, outpath);boolean res = job.waitForCompletion(true);System.exit(res?0:1);}}




原创粉丝点击