MR--WordCount的MapReduce程序注释

来源:互联网 发布:建筑工程软件有哪些 编辑:程序博客网 时间:2024/05/29 14:26

程序基于Hadoop2.7.4开发, 可运行

public class WordCount {    public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{        private final static IntWritable one = new IntWritable(1);        private Text word = new Text();        @Override        protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {            //使用StringTokenizer而不使用split()方法是为了提升效率            StringTokenizer itr = new StringTokenizer(value.toString());            while (itr.hasMoreTokens()){                //set text to contain a content of string                word.set(itr.nextToken());                //generate a output pair                context.write(word, one);            }        }    }    public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> {        private IntWritable result = new IntWritable();        public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {            //计次            int sum = 0;            for (IntWritable val : values) {                sum += val.get();            }            result.set(sum);            context.write(key, result);        }    }    public static void main(String[] args) throws Exception {        Configuration conf = new Configuration();        Job job = Job.getInstance(conf, "word count");        job.setJarByClass(WordCount.class);        job.setMapperClass(TokenizerMapper.class);        //job.setCombinerClass(IntSumReducer.class);        job.setReducerClass(IntSumReducer.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(IntWritable.class);        FileInputFormat.addInputPath(job, new Path(args[0]));        FileOutputFormat.setOutputPath(job, new Path(args[1]));        System.exit(job.waitForCompletion(true) ? 0 : 1);    }}
原创粉丝点击