mapreduce-java 案例

来源:互联网 发布:怎么给数据库表起别名 编辑:程序博客网 时间:2024/05/16 15:46
package cn.com.cennavi.test;import java.io.IOException;import java.util.Iterator;import java.util.StringTokenizer;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapred.FileInputFormat;import org.apache.hadoop.mapred.FileOutputFormat;import org.apache.hadoop.mapred.JobClient;import org.apache.hadoop.mapred.JobConf;import org.apache.hadoop.mapred.MapReduceBase;import org.apache.hadoop.mapred.Mapper;import org.apache.hadoop.mapred.OutputCollector;import org.apache.hadoop.mapred.Reducer;import org.apache.hadoop.mapred.Reporter;import org.apache.hadoop.mapred.TextInputFormat;import org.apache.hadoop.mapred.TextOutputFormat;public class MapReduceUtil {public static class Map extends MapReduceBase implementsMapper<LongWritable, Text, Text, IntWritable> {private final static IntWritable one = new IntWritable(1);private Text word = new Text();public void map(LongWritable key, Text value,OutputCollector<Text, IntWritable> output, Reporter reporter)throws IOException {String line = value.toString();StringTokenizer tokenizer = new StringTokenizer(line);while (tokenizer.hasMoreTokens()) {word.set(tokenizer.nextToken());output.collect(word, one);}}}public static class Reduce extends MapReduceBase implementsReducer<Text, IntWritable, Text, IntWritable> {public void reduce(Text key, Iterator<IntWritable> values,OutputCollector<Text, IntWritable> output, Reporter reporter)throws IOException {int sum = 0;while (values.hasNext()) {sum += values.next().get();}output.collect(key, new IntWritable(sum));}}public static void main(String[] args) throws Exception {JobConf conf = new JobConf(MapReduceUtil.class);conf.setJobName("wordcount");conf.setOutputKeyClass(Text.class);conf.setOutputValueClass(IntWritable.class);conf.setMapperClass(Map.class);conf.setCombinerClass(Reduce.class);conf.setReducerClass(Reduce.class);conf.setInputFormat(TextInputFormat.class);conf.setOutputFormat(TextOutputFormat.class);FileInputFormat.setInputPaths(conf, new Path(args[0]));FileOutputFormat.setOutputPath(conf, new Path(args[1]));JobClient.runJob(conf);}}

所用jar包:

hadoop-mapreduce-client-app-2.0.0-cdh4.5.0.jar

hadoop-mapreduce-client-common-2.0.0-cdh4.5.0.jar

hadoop-mapreduce-client-core-2.0.0-cdh4.5.0.jar

hadoop-mapreduce-client-hs-2.0.0-cdh4.5.0.jar

hadoop-mapreduce-client-hs-plugins-2.0.0-cdh4.5.0.jar

hadoop-mapreduce-client-jobclient-2.0.0-cdh4.5.0.jar

hadoop-mapreduce-client-jobclient-2.0.0-cdh4.5.0-tests.jar

hadoop-mapreduce-client-shuffle-2.0.0-cdh4.5.0.jar

hadoop-mapreduce-examples-2.0.0-cdh4.5.0.jar

0 0
原创粉丝点击