自定义Writable类型 两个数的排序

来源:互联网 发布:ubuntu网络配置命令 编辑:程序博客网 时间:2024/05/21 05:18

使用Writable 和 WritableComparable接口的区别 :
Writable是对value的封装,可以把多个值做一个类
WritableComparable继承自Writable 和 Comparable , 比Writable 多了个 compareTo方法 , 可以在compareTo方法中对数据的key进行排序处理

package com.cyh;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import java.text.SimpleDateFormat;import java.util.Date;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.WritableComparable;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class NumberSort {    /**     * 以DataBean 做为 key  , 内部进行比较 第一个数和第二个数, 来确定排序     * @author hadoop     */    public static class DataBean implements WritableComparable<DataBean>{        long first;        long second;        public DataBean() {        }        public DataBean(int first, int second) {            this.first = first;            this.second = second;        }        public long getFirst() {            return first;        }        public void setFirst(long first) {            this.first = first;        }        public long getSecond() {            return second;        }        public void setSecond(int second) {            this.second = second;        }        @Override        public void write(DataOutput out) throws IOException {            out.writeLong(first);            out.writeLong(second);        }        @Override        public void readFields(DataInput in) throws IOException {            first = in.readLong();            second = in.readLong();        }        @Override        public int compareTo(DataBean o) {            if (first != o.first) {                return first < o.first ? 1 : -1;            } else if (second != o.second) {                return second < o.second ? 1 : -1;            } else {                return 0;            }        }        @Override        public String toString() {            return first + " " + second;        }    }    public static class Map extends Mapper<LongWritable , Text, DataBean, LongWritable >{        @Override        protected void map(LongWritable key, Text value, Context context)                throws IOException, InterruptedException {            String line = value.toString();            String[] fields = line.split(" ");            int first = Integer.parseInt(fields[0]);            int second = Integer.parseInt(fields[1]);            System.out.println("ss");            //创建key对象            DataBean bean = new DataBean(first, second);            context.write(bean, new LongWritable(second));//第一第二个数作为key , 第二个数作为value        }    }    public static class Re extends Reducer<DataBean, LongWritable, Text, LongWritable>{        @Override        protected void reduce(DataBean key, Iterable<LongWritable> values,Context context)                throws IOException, InterruptedException {            long frist = key.getFirst();            for (LongWritable val : values){ //这是第二个数的集合 , 当第一个数 和 第二个数都相同时 就会形成多个值                context.write(new Text(String.valueOf(frist)), val);            }        }    }    public static void main(String[] args) throws Exception {        String time = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());        args = new String[]{"/input/data","/output/"+time};        Configuration conf = new Configuration();        Job job = Job.getInstance(conf);        //1. 设置启动类        job.setJarByClass(NumberSort.class);        //2. 设置map        job.setMapperClass(Map.class);        job.setMapOutputKeyClass(DataBean.class);        job.setMapOutputValueClass(LongWritable.class);        //3.设置reduce        job.setReducerClass(Re.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(LongWritable.class);        //4.设置输入输出路径        FileInputFormat.setInputPaths(job, new Path(args[0]));        FileOutputFormat.setOutputPath(job, new Path(args[1]));        //5.启动        job.waitForCompletion(true);    }}

初始数据

10 1010 910 81 32 31 45 96 96 96 94 44 54 44 5

排序后

10  1010  910  86   96   96   95   94   54   54   44   42   31   41   3