Hadoop WordCount代码
来源:互联网 发布:内容管理系统cms 开源 编辑:程序博客网 时间:2024/05/01 15:57
import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class WordCount { public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{ private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context ) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } } public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable<IntWritable> values, Context context ) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2); } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }}
0 0
- Hadoop入门WordCount代码
- hadoop wordcount 代码
- Hadoop WordCount代码
- hadoop wordcount代码事例详解
- Hadoop的WordCount代码解析
- hadoop之WordCount代码编写
- Hadoop分布式WordCount代码详解
- hadoop学习之WordCount.java代码解读
- hadoop学习之WordCount.java代码解读
- hadoop学习之WordCount.java代码解读
- hadoop自带wordcount代码详解
- Hadoop入门—WordCount代码分析
- hadoop入门经典:wordcount代码详解
- hadoop自带wordcount代码详解
- hadoop入门程序WordCount代码详解
- hadoop wordcount
- hadoop wordcount
- hadoop-wordcount
- 深度学习(二十)基于Overfeat的图片分类、定位、检测
- LibGDX_6.4: 常用系统控件: 复选框(CheckBox)
- C语言常见错误
- 推荐!手把手教你使用Git
- eclipse debug不了 Cannot connect to VM
- Hadoop WordCount代码
- PHP面向对象 1.6 常见的关键字
- 聚类分析:基本概念梳理
- android初学笔记,TextView及其派生组件的基本用法
- LibGDX_6.5: 常用系统控件: 文本框(TextField)
- 57,字符串的基本知识
- Linux c 算法与数据结构--栈
- 【Python】Python 三种导入模块的方法和区别
- android四大组件--ContentProvider详解