mapreduce编程模型之WritableComparator

来源：互联网发布：mac按键失灵编辑：程序博客网时间：2024/06/06 02:35

WritableComparator是一个类这个类是用于mapreduce编程模型中的比较排序

mapreduce中有两次排序一次是在环形缓冲区域之中进行分区排序

还有一次是数据在reduce端获取文件之后进行分组

现在我讲的是后面那个

//Define the comparator that controls which keys are grouped together or a single call to Reducer#reduce
job.setGroupingComparatorClass(MyComparator.class);

上面是我们在定义job时候进行的配置配置如何进行分组

setGroupingComparatorClass内部的参数是RawComparator

而WritableComparator是实现RawComparator

所以我们直接继承WritableComparator类就可以自己定义一个MyComparator

public static class MyComparator extends WritableComparator {        public MyComparator() {            super(Text.class,true);        }        @Override        public int compare(WritableComparable a, WritableComparable b) {            Text a1 = (Text) a;            Text b1 = (Text) b;            if (a1.toString().equals("hello") && b1.toString().equals("hello")) {                return -1;            } else {                return 0;            }        }    }

上面这段代码我必须说一个坑坑了我好几个小时最后在statckvoerflow网站上才找到的提示

就是那个无参构造子必须调用父类的构造子不然会报空指针未初始化 buffer

通过查找源码也确实发现了这个问题

  protected WritableComparator(Class<? extends WritableComparable> keyClass,                               Configuration conf,                               boolean createInstances) {    this.keyClass = keyClass;    this.conf = (conf != null) ? conf : new Configuration();    if (createInstances) {      key1 = newKey();      key2 = newKey();      buffer = new DataInputBuffer();    } else {      key1 = key2 = null;      buffer = null;    }  }

因为从报错的空指针来说是buffer为空整个类也就只有这人对buffer进行了初始化

最后来看一下结果

hdfs@yksp005206:/home/jumpserver$ hadoop fs -cat /test/wc/output/part-r-00000
hello value[] hello,
hello value[] hello,
world value[] hello,hellp,hive,kylin,spark,world,

从结果上来看确实是hello没有被分到一个reduce中而其他的所有单词都被分配到了同一个reducer中

package com.hit.ee;/** * Created by zh on 2017/9/28. */import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Partitioner;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;import java.util.StringTokenizer;public class WorldCount2 {    public static class TokenizerMapper            extends Mapper<Object, Text, Text, Text> {        private Text word = new Text();        public void map(Object key, Text value, Context context        ) throws IOException, InterruptedException {            StringTokenizer itr = new StringTokenizer(value.toString());            while (itr.hasMoreTokens()) {                word.set(itr.nextToken());                context.write(word, word);            }        }    }    public static class IntSumReducer            extends Reducer<Text, Text, Text, Text> {        public void reduce(Text key, Iterable<Text> values,                           Context context        ) throws IOException, InterruptedException {            Text text = new Text();            StringBuffer sb = new StringBuffer("value[] ");            for (Text value : values) {                sb.append(value).append(",");            }            text.set(sb.toString());            context.write(key, text);        }    }    public static class MyComparator2 implements RawComparator<Text>{        DataInputBuffer buffer = new DataInputBuffer();        @Override        public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {            try {                Text a = new Text();                buffer.reset(b1,s1,l1);                a.readFields(buffer);                Text b = new Text();                buffer.reset(b2,s2,l2);                b.readFields(buffer);                return compare(a,b);            } catch (IOException e) {                e.printStackTrace();            }            return -1;        }        @Override        public int compare(Text a1, Text b1) {            if (a1.toString().equals("hello") && b1.toString().equals("hello")) {                return -1;            } else {                return 0;            }        }    }    public static class MyComparator extends WritableComparator {        public MyComparator() {            super(Text.class,true);        }        @Override        public int compare(WritableComparable a, WritableComparable b) {            Text a1 = (Text) a;            Text b1 = (Text) b;            if (a1.toString().equals("hello") && b1.toString().equals("hello")) {                return -1;            } else {                return 0;            }        }    }    public static class MyPartitioner extends Partitioner<Text,Text>{        @Override        public int getPartition(Text key, Text value, int numPartitions) {            if (key.toString().equals("hello"))                return 0;            else                return 1;        }    }    public static void main(String[] args) throws Exception {        Configuration conf = new Configuration();        FileSystem.get(conf).deleteOnExit(new Path(args[1]));        Job job = Job.getInstance(conf, "word count");        job.setJarByClass(WorldCount2.class);        job.setMapperClass(TokenizerMapper.class);        job.setReducerClass(IntSumReducer.class);//        job.setNumReduceTasks(2);//        job.setPartitionerClass(MyPartitioner.class);        //Define the comparator that controls how the keys are sorted before they are passed to the reducer        //job.setSortComparatorClass(MyComparator.class);        //Define the comparator that controls which keys are grouped together or a single call to Reducer#reduce        job.setGroupingComparatorClass(MyComparator.class);        job.setMapOutputValueClass(Text.class);        job.setMapOutputKeyClass(Text.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(Text.class);        FileInputFormat.addInputPath(job, new Path(args[0]));        FileOutputFormat.setOutputPath(job, new Path(args[1]));        System.exit(job.waitForCompletion(true) ? 0 : 1);    }}

阅读全文

0 0