MapReduce自带wordcount的实现

来源:互联网 发布:tensorflow 物体识别 编辑:程序博客网 时间:2024/06/10 13:46
package com.bruce.mapreduce;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class WordCount {// step 1: Map Class/** * Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> *  */public static class WordCountMapper extendsMapper<LongWritable, Text, Text, IntWritable> {private Text mapOutputKey = new Text();private final static IntWritable mapOutputValue = new IntWritable(1);@Overrideprotected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {// TODO Auto-generated method stub//line valueString lineValue = value.toString();//splitStringTokenizer stringTokenizer = new StringTokenizer(lineValue);//iteratorwhile(stringTokenizer.hasMoreElements()){//get valueString wordValue = stringTokenizer.nextToken();//set valuemapOutputKey.set(wordValue);//outputcontext.write(mapOutputKey, mapOutputValue);}}}// step 2: Reduce Class/** * Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> *  */public static class WordCountReducer extendsReducer<Text, IntWritable, Text, IntWritable> {private IntWritable outputValue = new IntWritable();@Overrideprotected void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {// TODO Auto-generated method stub//sum tmpint sum = 0;//iteratorfor(IntWritable value: values){//totalsum += value.get();}//set valueoutputValue.set(sum);//outputcontext.write(key, outputValue);}}// step 3: Driver ,component jobpublic int run(String[] args) throws Exception {// 1: get configrationConfiguration configuration = new Configuration();// 2: create JobJob job = Job.getInstance(configuration, this.getClass().getSimpleName());// run jarjob.setJarByClass(this.getClass());// 3: set job// input -> map -> reduce -> output// 3.1 inputPath inPath = new Path(args[0]);FileInputFormat.addInputPath(job, inPath);// 3.2: mapjob.setMapperClass(WordCountMapper.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);// 3.3: reducejob.setReducerClass(WordCountReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);// 3.4: outputPath outPath = new Path(args[1]);FileOutputFormat.setOutputPath(job, outPath);// 4: submit jobboolean isSuccess = job.waitForCompletion(true);return isSuccess ? 0 : 1;}//step 4: run programpublic static void main(String[] args) throws Exception {int status = new WordCount().run(args);System.exit(status);}}

原创粉丝点击