MR代码实例-wordcount

来源:互联网 发布:电脑的网络图标不见了 编辑:程序博客网 时间:2024/05/16 10:10
package com.tiger.test;


import java.io.IOException;
import java.util.StringTokenizer;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.log4j.Logger;


public class WordCountTest {
private static final Logger log = Logger.getLogger(WordCountTest.class);
public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
 private final static IntWritable one = new IntWritable(1);
 private Text word = new Text();
 public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
 log.info("Map key : " + key);
  log.info("Map value : " + value);
      StringTokenizer itr = new StringTokenizer(value.toString());
  while (itr.hasMoreTokens()) {
     String wordStr = itr.nextToken();
      word.set(wordStr);
     log.info("Map word : " + wordStr);
      context.write(word, one);
      }
 }
}

public static class IntSumReducer extends
     Reducer<Text, IntWritable, Text, IntWritable> {
 private IntWritable result = new IntWritable();


 public void reduce(Text key, Iterable<IntWritable> values,
         Context context) throws IOException, InterruptedException {
     log.info("Reduce key : " + key);
     log.info("Reduce value : " + values);
     int sum = 0;
     for (IntWritable val : values) {
         sum += val.get();
     }
     result.set(sum);
     log.info("Reduce sum : " + sum);
     context.write(key, result);
 }
}


public static void main(String[] args) throws Exception {
 Configuration conf = new Configuration();
 String[] otherArgs = new GenericOptionsParser(conf, args)
         .getRemainingArgs();
 if (otherArgs.length != 2) {
     System.err.println("Usage: WordCountTest <in> <out>");
     System.exit(2);
 }


 Job job = new Job(conf, "word count");
 job.setJarByClass(WordCountTest.class);


 job.setMapperClass(TokenizerMapper.class);
 job.setCombinerClass(IntSumReducer.class);
 job.setReducerClass(IntSumReducer.class);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(IntWritable.class);


 FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
 FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));


 System.exit(job.waitForCompletion(true) ? 0 : 1);
}

}
0 0
原创粉丝点击