Hadoop之——Combiner编程
来源:互联网 发布:centos mail 发送邮件 编辑:程序博客网 时间:2024/06/04 18:04
转载请注明出处:http://blog.csdn.net/l1028386804/article/details/46135857
一、Mapper类的实现
/** * KEYIN即k1表示行的偏移量 * VALUEIN即v1表示行文本内容 * KEYOUT即k2表示行中出现的单词 * VALUEOUT即v2表示行中出现的单词的次数,固定值1 */static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable>{protected void map(LongWritable k1, Text v1, Context context) throws java.io.IOException ,InterruptedException {final String[] splited = v1.toString().split("\t");for (String word : splited) {context.write(new Text(word), new LongWritable(1));System.out.println("Mapper输出<"+word+","+1+">");}};}
二、Reducer类的实现
/** * KEYIN即k2表示行中出现的单词 * VALUEIN即v2表示行中出现的单词的次数 * KEYOUT即k3表示文本中出现的不同单词 * VALUEOUT即v3表示文本中出现的不同单词的总次数 * */static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable>{protected void reduce(Text k2, java.lang.Iterable<LongWritable> v2s, Context ctx) throws java.io.IOException ,InterruptedException {//显示次数表示redcue函数被调用了多少次,表示k2有多少个分组System.out.println("MyReducer输入分组<"+k2.toString()+",...>");long times = 0L;for (LongWritable count : v2s) {times += count.get();//显示次数表示输入的k2,v2的键值对数量System.out.println("MyReducer输入键值对<"+k2.toString()+","+count.get()+">");}ctx.write(k2, new LongWritable(times));};}
三、Combiner的类实现
static class MyCombiner extends Reducer<Text, LongWritable, Text, LongWritable>{protected void reduce(Text k2, java.lang.Iterable<LongWritable> v2s, Context ctx) throws java.io.IOException ,InterruptedException {//显示次数表示redcue函数被调用了多少次,表示k2有多少个分组System.out.println("Combiner输入分组<"+k2.toString()+",...>");long times = 0L;for (LongWritable count : v2s) {times += count.get();//显示次数表示输入的k2,v2的键值对数量System.out.println("Combiner输入键值对<"+k2.toString()+","+count.get()+">");}ctx.write(k2, new LongWritable(times));//显示次数表示输出的k2,v2的键值对数量System.out.println("Combiner输出键值对<"+k2.toString()+","+times+">");};}
四、完整代码
package combine;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;/** * 问:为什么使用Combiner? * 答:Combiner发生在Map端,对数据进行规约处理,数据量变小了,传送到reduce端的数据量变小了,传输时间变短,作业的整体时间变短。 * * 问:为什么Combiner不作为MR运行的标配,而是可选步骤哪? * 答:因为不是所有的算法都适合使用Combiner处理,例如求平均数。 * * 问:Combiner本身已经执行了reduce操作,为什么在Reducer阶段还要执行reduce操作哪? * 答:combiner操作发生在map端的,处理一个任务所接收的文件中的数据,不能跨map任务执行;只有reduce可以接收多个map任务处理的数据。 * */public class WordCountApp {static final String INPUT_PATH = "hdfs://liuyazhuang:9000/hello";static final String OUT_PATH = "hdfs://liuyazhuang:9000/out";public static void main(String[] args) throws Exception {Configuration conf = new Configuration();final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);final Path outPath = new Path(OUT_PATH);if(fileSystem.exists(outPath)){fileSystem.delete(outPath, true);}final Job job = new Job(conf , WordCountApp.class.getSimpleName());//1.1指定读取的文件位于哪里FileInputFormat.setInputPaths(job, INPUT_PATH);//指定如何对输入文件进行格式化,把输入文件每一行解析成键值对//job.setInputFormatClass(TextInputFormat.class);//1.2 指定自定义的map类job.setMapperClass(MyMapper.class);//map输出的<k,v>类型。如果<k3,v3>的类型与<k2,v2>类型一致,则可以省略//job.setMapOutputKeyClass(Text.class);//job.setMapOutputValueClass(LongWritable.class);//1.3 分区//job.setPartitionerClass(HashPartitioner.class);//有一个reduce任务运行//job.setNumReduceTasks(1);//1.4 TODO 排序、分组//1.5 规约job.setCombinerClass(MyCombiner.class);//2.2 指定自定义reduce类job.setReducerClass(MyReducer.class);//指定reduce的输出类型job.setOutputKeyClass(Text.class);job.setOutputValueClass(LongWritable.class);//2.3 指定写出到哪里FileOutputFormat.setOutputPath(job, outPath);//指定输出文件的格式化类//job.setOutputFormatClass(TextOutputFormat.class);//把job提交给JobTracker运行job.waitForCompletion(true);}/** * KEYIN即k1表示行的偏移量 * VALUEIN即v1表示行文本内容 * KEYOUT即k2表示行中出现的单词 * VALUEOUT即v2表示行中出现的单词的次数,固定值1 */static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable>{protected void map(LongWritable k1, Text v1, Context context) throws java.io.IOException ,InterruptedException {final String[] splited = v1.toString().split("\t");for (String word : splited) {context.write(new Text(word), new LongWritable(1));System.out.println("Mapper输出<"+word+","+1+">");}};}/** * KEYIN即k2表示行中出现的单词 * VALUEIN即v2表示行中出现的单词的次数 * KEYOUT即k3表示文本中出现的不同单词 * VALUEOUT即v3表示文本中出现的不同单词的总次数 * */static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable>{protected void reduce(Text k2, java.lang.Iterable<LongWritable> v2s, Context ctx) throws java.io.IOException ,InterruptedException {//显示次数表示redcue函数被调用了多少次,表示k2有多少个分组System.out.println("MyReducer输入分组<"+k2.toString()+",...>");long times = 0L;for (LongWritable count : v2s) {times += count.get();//显示次数表示输入的k2,v2的键值对数量System.out.println("MyReducer输入键值对<"+k2.toString()+","+count.get()+">");}ctx.write(k2, new LongWritable(times));};}static class MyCombiner extends Reducer<Text, LongWritable, Text, LongWritable>{protected void reduce(Text k2, java.lang.Iterable<LongWritable> v2s, Context ctx) throws java.io.IOException ,InterruptedException {//显示次数表示redcue函数被调用了多少次,表示k2有多少个分组System.out.println("Combiner输入分组<"+k2.toString()+",...>");long times = 0L;for (LongWritable count : v2s) {times += count.get();//显示次数表示输入的k2,v2的键值对数量System.out.println("Combiner输入键值对<"+k2.toString()+","+count.get()+">");}ctx.write(k2, new LongWritable(times));//显示次数表示输出的k2,v2的键值对数量System.out.println("Combiner输出键值对<"+k2.toString()+","+times+">");};}}
五、运行结果
0 0
- Hadoop之——Combiner编程
- Hadoop之——Combiner编程
- Hadoop学习笔记—8.Combiner与自定义Combiner
- hadoop之 mapreduce Combiner
- Hadoop组件之Combiner
- Hadoop 之 Combiner 与自定义 Combiner
- hadoop细节——shuffle和combiner
- MapReduce编程之Combiner
- Hadoop详解(四)——Shuffle原理,Partitioner分区原理,Combiner编程,常见的MR算法
- Hadoop之combiner和partitioner
- Hadoop之combiner和partitioner
- Hadoop之Combiner与自定义Combiner(笔记8)
- 学习Hadoop第十六课(Combiner编程)
- Combiner类和Partitioner类——hadoop
- hadoop-combiner
- hadoop之shuffle------>soft和combiner
- 辛星笔记之Hadoop权威指南第三篇combiner
- Hadoop之MapReduce的Combiner详解(三)
- android 初识socket通信--java程序做服务器
- 2015年上半年信息系统项目管理师上午真题及答案
- 20150528
- bestcoders Happy Birthday
- 谈谈如何设计秒杀服务
- Hadoop之——Combiner编程
- Linux 信号机制 (二)
- fzu2154 YesOrNo
- 安装根证书
- Visual Studio 2013中在IE浏览器浏览localhost网站时候,发现会不断有下面链接的请求
- ucore操作系统lab1实验准备知识
- 【HDU】5244 inverse【打表找规律——FFT】
- 轻量级javaEE SSH 02: jsp servlet
- 《鸟哥的Linux私房菜》读书笔记:软件安装:RPM,SRPM和YUM功能