mapreduce代码备忘
来源:互联网 发布:php 分割 编辑:程序博客网 时间:2024/05/23 14:49
import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class WordCount{ public static class TokenizerMapper extends Mapper<Object,Text,Text,IntWritable>{ private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context) throws IOException,InterruptedException{ StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()){ word.set(itr.nextToken()); context.write(word,one); } } } public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable>{ private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException,InterruptedException{ int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key,result); } } public static void main(String[] args) throws Exception{ Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } Job job = new Job(conf,"word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job,new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job,new Path(otherArgs[1])); System.exit(job.waitForCompletion(true)?0:1); }}
编译java源代码
[hadoop@localhost code]$ javac -classpath /usr/local/hadoop/hadoop-core-1.2.1.jar:/usr/local/hadoop/lib/commons-cli-1.2.jar WordCount.java
打包
[hadoop@localhost code]$ jar -cvf wordcount.jar *.class
执行程序
[hadoop@localhost code]$ ./bin/hadoop jar ./code/wordcount.jar WordCount input/ output/
查看结果
[hadoop@localhost code]$ ./bin/hadoop fs -cat output/*
0 0
- mapreduce代码备忘
- 备忘代码
- 代码备忘
- 代码备忘
- 代码备忘
- MYSQL出错代码[备忘]
- jsp 个人代码备忘
- css代码一段,备忘
- silverlight 常用代码备忘
- File操作代码备忘
- C++代码功能块备忘
- Qt 常用代码 备忘
- 代码备忘:数据库游标
- android特别代码备忘
- 代码备忘:常用SQL
- PMON代码分析备忘
- 工具代码备忘
- Android数据库代码备忘
- Freemarker实现页面静态化cms
- Could not execute auto check for display colors using command /usr/bin/xdpyinfo. Check if the DISPLA
- string数据乱码表示对应
- USACO 1.3 Ski Course Design
- 12.3.1 用迭代器转换序列
- mapreduce代码备忘
- Makefile经典教程(掌握这些足够)
- 05-自定义构造方法和description方法(%@带打印一个对象的时候,会调用这个方法)
- linux shell comm命令的使用
- Android基础_Activity
- Jackson将json字符串转换成泛型List
- IOS 视图层次管理
- NYOJ 115 城市平乱(Dijkstra入门)
- MySQL中sql查询某一列数据是否以汉字或非汉字开头