数据算法-hadoop2 二次排序

来源:互联网 发布:5230软件下载 编辑:程序博客网 时间:2024/06/02 19:42

二次排序

输入文件

年,月,日,温度2000,12,04,102000,11,01,202000,12,12,-202000,11,07,302000,12,24,-402012,12,21,302012,12,22,-202012,12,23,602012,12,24,702012,12,25,102013,01,22,802013,01,23,902013,01,24,702013,01,20,-10

按照我之前的写法,就是先通过年月当做key放入reduce,然后通过reduce把温度通过list进行排序,这个方法如果reduce端相同key数据量太大就内存溢出。

这里是用组合key,排序key
groupingcomparator之前没用过,找了下作用,就是通过组合key中一个字段(也就是yearmonth)进行组合。

        job.setMapperClass(SecondarySortMapper.class);        job.setReducerClass(SecondarySortReducer.class);        job.setOutputKeyClass(DateTemperaturePair.class);        job.setOutputValueClass(IntWritable.class);        job.setPartitionerClass(DateTemperatureParititioner.class);        job.setGroupingComparatorClass(DateTemperatureGroupingComparator.class);
public class SecondarySortMapper extends Mapper<Object, Text, DateTemperaturePair, IntWritable> {    public void map(Object key, Text value, Context context)            throws IOException, InterruptedException {        String line =value.toString();        String[] tokens = line.split(",");        String yearMonth = tokens[0] + tokens[1];        String day = tokens[2];        int temperature = Integer.parseInt(tokens[3]);        DateTemperaturePair reduceKey = new DateTemperaturePair();        reduceKey.setYearMonth(new Text(yearMonth));        reduceKey.setDay(new Text(day));        reduceKey.setTemperature(new IntWritable(temperature));        context.write(reduceKey, new IntWritable(temperature));    }}
public class SecondarySortReducer extends        Reducer<DateTemperaturePair, IntWritable, Text, Text> {    /**     * reduce     */    @Override    protected void reduce(DateTemperaturePair key,            Iterable<IntWritable> values, Context context) throws IOException,            InterruptedException {        StringBuilder sb = new StringBuilder();        for (IntWritable t:values) {            sb.append(t);            sb.append(",");        }        context.write(key.getYearMonth(),new Text(sb.toString()));    }}
public class DateTemperaturePair implements Writable,        WritableComparable<DateTemperaturePair> {    private Text yearMonth = new Text();    private Text day = new Text();    private IntWritable temperature = new IntWritable();    @Override    public int compareTo(DateTemperaturePair pair) {        int compareValue = this.yearMonth.compareTo(pair.yearMonth);        if (compareValue == 0) {            compareValue = temperature.compareTo(pair.temperature);        }        return compareValue;        // -1*compareValue    }    @Override    public void readFields(DataInput in) throws IOException {        yearMonth.readFields(in);        day.readFields(in);        temperature.readFields(in);    }    @Override    public void write(DataOutput out) throws IOException {        yearMonth.write(out);        day.write(out);        temperature.write(out);    }    public Text getYearMonth() {        return yearMonth;    }    public void setYearMonth(Text yearMonth) {        this.yearMonth = yearMonth;    }    public Text getDay() {        return day;    }    public void setDay(Text day) {        this.day = day;    }    public IntWritable getTemperature() {        return temperature;    }    public void setTemperature(IntWritable temperature) {        this.temperature = temperature;    }}
public class DateTemperatureParititioner extends        Partitioner<DateTemperaturePair, Text> {    @Override    public int getPartition(DateTemperaturePair pair, Text text, int number) {        return Math.abs(pair.getYearMonth().hashCode() % number);    }}
public class DateTemperatureGroupingComparator extends WritableComparator {    public DateTemperatureGroupingComparator() {        super(DateTemperaturePair.class, true);    }    @Override    public int compare(WritableComparable wc1, WritableComparable wc2) {        DateTemperaturePair pair = (DateTemperaturePair) wc1;        DateTemperaturePair pair2 = (DateTemperaturePair) wc2;        return pair.getYearMonth().compareTo(pair2.getYearMonth());    }}

输出结果

200011  20,30,200012  -40,-20,10,201212  -20,10,30,60,70,201301  -10,70,80,90,
原创粉丝点击