mapreduce编程模型之WritableComparator
来源:互联网 发布:mac按键失灵 编辑:程序博客网 时间:2024/06/06 02:35
WritableComparator是一个类 这个类是用于mapreduce编程模型中的比较 排序
mapreduce中有两次排序 一次是 在环形缓冲区域之中进行分区 排序
还有一次是数据在reduce端获取文件之后进行分组
现在我讲的是后面那个
//Define the comparator that controls which keys are grouped together or a single call to Reducer#reduce
job.setGroupingComparatorClass(MyComparator.class);
上面是我们在定义job时候进行的配置 配置如何进行分组
setGroupingComparatorClass内部的参数是RawComparator
而WritableComparator是实现RawComparator
所以我们直接继承WritableComparator类就可以自己定义一个MyComparator
public static class MyComparator extends WritableComparator { public MyComparator() { super(Text.class,true); } @Override public int compare(WritableComparable a, WritableComparable b) { Text a1 = (Text) a; Text b1 = (Text) b; if (a1.toString().equals("hello") && b1.toString().equals("hello")) { return -1; } else { return 0; } } }上面这段代码我必须说一个坑 坑了我好几个小时 最后在statckvoerflow网站上才找到的提示
就是那个无参构造子 必须调用父类的构造子 不然会报空指针 未初始化 buffer
通过查找源码 也确实发现了这个问题
protected WritableComparator(Class<? extends WritableComparable> keyClass, Configuration conf, boolean createInstances) { this.keyClass = keyClass; this.conf = (conf != null) ? conf : new Configuration(); if (createInstances) { key1 = newKey(); key2 = newKey(); buffer = new DataInputBuffer(); } else { key1 = key2 = null; buffer = null; } }
因为从报错的空指针来说 是buffer为空 整个类也就只有这人对buffer进行了初始化
最后来看一下结果
hdfs@yksp005206:/home/jumpserver$ hadoop fs -cat /test/wc/output/part-r-00000
hello value[] hello,
hello value[] hello,
world value[] hello,hellp,hive,kylin,spark,world,
从结果上来看 确实是hello没有被分到一个reduce中 而其他的所有单词都被分配到了同一个reducer中
package com.hit.ee;/** * Created by zh on 2017/9/28. */import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.*;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Partitioner;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;import java.util.StringTokenizer;public class WorldCount2 { public static class TokenizerMapper extends Mapper<Object, Text, Text, Text> { private Text word = new Text(); public void map(Object key, Text value, Context context ) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, word); } } } public static class IntSumReducer extends Reducer<Text, Text, Text, Text> { public void reduce(Text key, Iterable<Text> values, Context context ) throws IOException, InterruptedException { Text text = new Text(); StringBuffer sb = new StringBuffer("value[] "); for (Text value : values) { sb.append(value).append(","); } text.set(sb.toString()); context.write(key, text); } } public static class MyComparator2 implements RawComparator<Text>{ DataInputBuffer buffer = new DataInputBuffer(); @Override public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { try { Text a = new Text(); buffer.reset(b1,s1,l1); a.readFields(buffer); Text b = new Text(); buffer.reset(b2,s2,l2); b.readFields(buffer); return compare(a,b); } catch (IOException e) { e.printStackTrace(); } return -1; } @Override public int compare(Text a1, Text b1) { if (a1.toString().equals("hello") && b1.toString().equals("hello")) { return -1; } else { return 0; } } } public static class MyComparator extends WritableComparator { public MyComparator() { super(Text.class,true); } @Override public int compare(WritableComparable a, WritableComparable b) { Text a1 = (Text) a; Text b1 = (Text) b; if (a1.toString().equals("hello") && b1.toString().equals("hello")) { return -1; } else { return 0; } } } public static class MyPartitioner extends Partitioner<Text,Text>{ @Override public int getPartition(Text key, Text value, int numPartitions) { if (key.toString().equals("hello")) return 0; else return 1; } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); FileSystem.get(conf).deleteOnExit(new Path(args[1])); Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WorldCount2.class); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(IntSumReducer.class);// job.setNumReduceTasks(2);// job.setPartitionerClass(MyPartitioner.class); //Define the comparator that controls how the keys are sorted before they are passed to the reducer //job.setSortComparatorClass(MyComparator.class); //Define the comparator that controls which keys are grouped together or a single call to Reducer#reduce job.setGroupingComparatorClass(MyComparator.class); job.setMapOutputValueClass(Text.class); job.setMapOutputKeyClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }}
- mapreduce编程模型之WritableComparator
- Hadoop之MapReduce编程模型
- mapreduce编程模型之partitioner
- hadoop初学之MapReduce编程模型学习
- MapReduce编程模型之InputFormat分析(-)
- MapReduce编程模型之InputFormat分析(二)
- 【MongoDB】MongoDB数据库之MapReduce编程模型
- mapreduce编程模型介绍
- MapReduce编程模型
- MapReduce编程模型
- MapReduce 编程模型
- MapReduce 编程模型概述
- MapReduce编程模型简介
- MapReduce 编程模型概述
- MapReduce编程模型
- MapReduce编程模型概述
- Hadoop MapReduce编程模型
- MapReduce 编程模型概述
- 高通Qualcomm平台lk(light kernel)启动流程2——aboot_init()
- ActiveMQ监控队列状态报javax.management.InstanceNotFoundException
- Oratop工具_实时数据库性能监控工具
- java 获取 T.class
- Data Structure: Segment Tree 线段树
- mapreduce编程模型之WritableComparator
- 【汇总】java常见面试题汇总
- 助解“中等收入陷阱”金融是否大有可为
- android json在线解析
- Oracle SQL
- Javaweb 文件上传到服务器
- jQuery判断元素是否存在的可靠方法
- springboot整合Quartz实现动态配置定时任务
- 第五周项目一 建立顺序栈算法库