老api写法的wordcount程序
来源:互联网 发布:淘宝快递单打印软件 编辑:程序博客网 时间:2024/06/05 03:36
package com.ccse.hadoop.old;import java.io.IOException;import java.net.URI;import java.net.URISyntaxException;import java.util.Iterator;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapred.FileOutputFormat;import org.apache.hadoop.mapred.JobClient;import org.apache.hadoop.mapred.JobConf;import org.apache.hadoop.mapred.MapReduceBase;import org.apache.hadoop.mapred.Mapper;import org.apache.hadoop.mapred.OutputCollector;import org.apache.hadoop.mapred.Reducer;import org.apache.hadoop.mapred.Reporter;import org.apache.hadoop.mapred.TextInputFormat;import org.apache.hadoop.mapred.TextOutputFormat;import org.apache.hadoop.mapred.FileInputFormat;/** * 老api的写法 * @author woshiccna * */public class WordCountApp {public final static String INPUT_PATH = "hdfs://chaoren1:9000/mapinput";public final static String OUTPUT_PATH = "hdfs://chaoren1:9000/mapoutput";public static void main(String[] args) throws IOException, URISyntaxException {JobConf conf = new JobConf(WordCountApp.class);conf.setJobName("wordcount");Configuration config = new Configuration();FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH), config);fileSystem.delete(new Path(OUTPUT_PATH), true);conf.setMapperClass(MyMapper.class);conf.setOutputKeyClass(Text.class);conf.setOutputValueClass(LongWritable.class);conf.setInputFormat(TextInputFormat.class);FileInputFormat.setInputPaths(conf, new Path(INPUT_PATH));conf.setReducerClass(MyReducer.class);conf.setOutputKeyClass(Text.class);conf.setOutputValueClass(LongWritable.class);conf.setOutputFormat(TextOutputFormat.class);FileOutputFormat.setOutputPath(conf, new Path(OUTPUT_PATH));JobClient.runJob(conf);}public static class MyMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, LongWritable> {private Text word = new Text();private LongWritable writable = new LongWritable(1);@Overridepublic void map(LongWritable key, Text value,OutputCollector<Text, LongWritable> output, Reporter reporter)throws IOException {if (value != null) {String line = value.toString();StringTokenizer tokenizer = new StringTokenizer(line);while (tokenizer.hasMoreElements()) {word = new Text(tokenizer.nextToken()); output.collect(word, writable);}}}}public static class MyReducer extends MapReduceBase implements Reducer<Text, LongWritable, Text, LongWritable> {@Overridepublic void reduce(Text key, Iterator<LongWritable> values,OutputCollector<Text, LongWritable> output, Reporter reporter)throws IOException {long sum = 0;while (values.hasNext()) {LongWritable value = values.next();sum += value.get();}output.collect(key, new LongWritable(sum));}}}
0 0
- 老api写法的wordcount程序
- hadooop的wordcount程序
- scala版本wordcount的几种写法
- Hadoop MapReduce基于新API的WordCount程序运行过程分析
- 改写Hadoop的wordcount程序
- WordCount程序的简单解释
- 入门Hadoop的WordCount程序
- 简单的MapReduce程序wordCount
- API文档的写法
- wordcount程序
- hadoop 的helloword 的wordcount程序
- hadoop自带的WordCount程序
- hadoop的统计单词程序WordCount
- Hadoop wordcount程序的配置运行
- wordcount例子的程序,附带说明
- WordCount:Hadoop中MapReduce的HelloWorld程序
- Hadoop的第一个程序 wordcount
- 用SBT编译Spark的WordCount程序
- Data Mining UVA1591原题翻译+题解
- Linux Shell 学习笔记(三)--Shell变量
- 学习MFC时遇到宏展开后编译错误
- seam学习之事件+pages.xml
- chmod 权限管理 Centos 开放80端口
- 老api写法的wordcount程序
- Sicily 1034
- 用一个程序总结运算符重载
- iOS中GPS定位
- 【JavaSE】关于foreach
- Linux系统密码忘记后的五种恢复方法
- How to access pixel data of an image
- 分布式系统的Raft算法
- JavaScript实现密码强度验证