Hadoop自定义Writable实现二次排序

来源:互联网 发布:李鸿章 袁世凯 知乎 编辑:程序博客网 时间:2024/04/28 18:12

输入数据集

20,75,cqu
25,90,cqnu
20,70,cqupt
24,80,cquk

二次排序功能

先按第一列数字排序,再按第二列数字排序

输出结果

20,70 cqupt
20,75 cqu
24,80 cquk
25,90 cqnu

实现原理

因为MapReduce的输出是按key排序的,所以,我们可以自定义一个key,这个key包含第一列和第二列。在实现compareTo方法时,先按第一列排序,再按第二列排序。

实现代码

import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.WritableComparable;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class TestJoin {    public static class IntPairWritable implements WritableComparable<IntPairWritable>{        private IntWritable first;        IntWritable second;        public IntPairWritable(){            set(new IntWritable(), new IntWritable());        }        public void set(IntWritable first, IntWritable second){            this.first = first;            this.second = second;        }        @Override        public void write(DataOutput out) throws IOException {            this.first.write(out);            this.second.write(out);        }        @Override        public void readFields(DataInput in) throws IOException {            this.first.readFields(in);            this.second.readFields(in);        }        @Override        public int compareTo(IntPairWritable o) {            int result = this.first.compareTo(o.first);            if(result != 0)                return result;            return this.second.compareTo(o.second);        }        public boolean equals(Object o){            if(o instanceof IntPairWritable){                IntPairWritable obj = (IntPairWritable)o;                return this.first.equals(obj.first) && this.second.equals(obj.second);            }            return false;        }        public String toString(){            return this.first.toString() + "," + this.second.toString();        }    }    public static class SecondSortMapper extends Mapper<LongWritable, Text , IntPairWritable , Text>{        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{            String array[] = value.toString().split(",");            IntPairWritable keyPair = new IntPairWritable();            keyPair.set(new IntWritable(Integer.valueOf(array[0])),new IntWritable( Integer.valueOf(array[1])));            context.write(keyPair, new Text(array[2]));        }    }    public static class TestJoinReducer extends Reducer<IntPairWritable, Text, IntPairWritable, Text>{        public void reduce(IntPairWritable key, Iterable<Text> value, Context context) throws IOException, InterruptedException{            for(Text v : value){                context.write(key, v);            }        }    }    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {        if(args.length < 2){            System.out.println("args must be three");return ;        }        Configuration conf = new Configuration();        Job job = Job.getInstance(conf,"TestJoin");        job.setJarByClass(TestJoin.class);        job.setMapperClass(SecondSortMapper.class);        job.setReducerClass(TestJoinReducer.class);        job.setOutputKeyClass(IntPairWritable.class);  //        job.setOutputValueClass(Text.class);        FileInputFormat.addInputPath(job, new Path(args[0]));        FileOutputFormat.setOutputPath(job, new Path(args[2]));        System.exit(job.waitForCompletion(true)?0:1);    }}

注意事项

在自定义Writable时,要实现的方法有read,write,compareTo三个方法。
除此之外,还要重写toString方法,以便于key可以输出内容。

0 0