MapReduce编程之Combiner

来源:互联网 发布:动漫英雄 网络剧 下载 编辑:程序博客网 时间:2024/06/05 18:14

MapReduce编程之Combiner

对每个 mapper 先进行reduce计算。即在map端做了一个本地的Reduce操作

public class CombinerApp {    /**     * Map:读取输入的文件     */    public static class MyMapper extends Mapper<LongWritable,Text,Text,LongWritable>{        LongWritable one = new LongWritable(1);        @Override        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {            //接收到的每一行数据            String line = value.toString();            //按照指定分隔符进行拆分            String[] words = line.split(" ");            for ( String word : words ) {                //通过上下文把map的处理结果输出                context.write(new Text(word),one);            }        }    }    /**     * Reduce: 归并操作     */    public static class MyReducer extends Reducer<Text,LongWritable,Text,LongWritable>{        @Override        protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {            long sum = 0;            for (LongWritable value:values) {                //求key出现的次数总和                sum += value.get();            }            //最终统计结果的输出            context.write(key,new LongWritable(sum));        }    }    /**     * 定义Driver:封装了MapReduce作业的所有信息     * @param args     */    public static void main(String[] args) throws Exception {        //创建Configuration        Configuration configuration = new Configuration();        //准备清理已存在的输出目录        Path outputPath = new Path(args[1]);        FileSystem fileSystem = FileSystem.get(configuration);        if (fileSystem.exists(outputPath)){            fileSystem.delete(outputPath,true);            System.out.println("output file exists, but is has deleted");        }        //创建Job        Job job = Job.getInstance(configuration, "wordcount");        //设置job的处理类        job.setJarByClass(CombinerApp.class);        //设置作业处理的输入路径        FileInputFormat.setInputPaths(job,new Path(args[0]));        //设置map相关参数        job.setMapperClass(MyMapper.class);        job.setMapOutputKeyClass(Text.class);        job.setMapOutputValueClass(LongWritable.class);        //设置reduce相关参数        job.setReducerClass(MyReducer.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(LongWritable.class);        //通过job设置combiner处理类,其实逻辑上和我们的reduce一模一样        //对每个 mapper 先进行reduce计算。即在map端做了一个本地的Reduce操作        job.setCombinerClass(WordCountApp.MyReducer.class);        //设置作业处理的输出路径        FileOutputFormat.setOutputPath(job,new Path(args[1]));        System.exit(job.waitForCompletion(true) ? 0 : 1);    }}