hadoop中conbine的简单使用《转》

来源:互联网 发布:三星note8网络设置 编辑:程序博客网 时间:2024/06/16 04:39
combine函数把一个map函数产生的<key,value>对(多个key, value)合并成一个新的<key2,value2>. 将新的<key2,value2>作为输入到reduce函数中。其格式与reduce函数相同。
例如:将3个文件中的数值相加。
file1: 1 2 3
file2: 4 5 6
file3: 7 8 9

public class MyMapre06 {
    public static class Map extends MapReduceBase implements
            Mapper<LongWritable, Text, Text, Text> {

        private Text word = new Text();
        private Text val = new Text();

        public void map(LongWritable key, Text value,
                OutputCollector<Text, Text> output, Reporter reporter)
                throws IOException {
            String line = value.toString();
            String bignum = new StringBuffer(line).toString();

            word.set("1");
            val.set(bignum);
            output.collect(word, val);
        }

    }

    public static class Reduce extends MapReduceBase implements
            Reducer<Text, Text, Text, Text> {
        public void reduce(Text key, Iterator<Text> values,
                OutputCollector<Text, Text> output, Reporter reporter)
                throws IOException {
            BigInteger num = BigInteger.valueOf(0);
            String tmp = new String();
            Text v = new Text();

            while (values.hasNext()) // 计算同一个key下,所有value的总和
            {
                tmp = values.next().toString();
                num = num.add(new BigInteger(tmp));
            }

            String res = new StringBuffer(num.toString()).toString();
            v.set(res);
            output.collect(key, v); // 收集reduce输出结果
        }
    }

    public static class Combiner extends MapReduceBase implements
            Reducer<Text, Text, Text, Text> {
        public void reduce(Text key, Iterator<Text> values,
                OutputCollector<Text, Text> output, Reporter reporter)
                throws IOException {
            BigInteger num = BigInteger.valueOf(0);
            String tmp = new String();
            Text v = new Text();

            while (values.hasNext()) // 计算同一个key下,所有value的总和
            {
                tmp = values.next().toString();
                num = num.add(new BigInteger(tmp));
            }

            v.set(num.toString());
            output.collect(key, v); // 收集reduce输出结果
        }
    }

    public static void main(String[] args) throws Exception {

        JobConf conf = new JobConf(MyMapre06.class);
        conf.setJobName("Sum");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        conf.setMapperClass(Map.class);
        conf.setCombinerClass(Combiner.class);   //使用combiner函数   
        conf.setReducerClass(Reduce.class);

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[0]));
        FileOutputFormat.setOutputPath(conf, new Path(args[1]));

        JobClient.runJob(conf);
    }
}

经过 Combiner函数, file1 为  6, file2 为 15, file3 为 24
进过 Reduce函数, 输出 key 为 1 value 为 35

原创粉丝点击