mapreduce编程练习(一)简单的练习 WordCount
来源:互联网 发布:软件存储权限 编辑:程序博客网 时间:2024/06/12 13:14
入门训练:WordCount
问题描述:对一个或多个输入文件中的单词进行计数统计,比如一个文件的输入文件如下
输出格式:
运行代码实例:
package hadoopLearn;import java.io.IOException;import java.net.URI;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.DoubleWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.P;public class WordCount extends Configured implements Tool {private static double count = 0; public static class CountMapper extends Mapper<LongWritable, Text, Text, LongWritable>{private Text word = new Text();private LongWritable one = new LongWritable(1);@Overrideprotected void map(LongWritable key,Text value,Mapper<LongWritable, Text, Text, LongWritable>.Context context)throws IOException,InterruptedException{System.out.println("line pos:" + key.toString());String line = value.toString();StringTokenizer tokenizer = new StringTokenizer(line);while (tokenizer.hasMoreElements()) {count ++;word.set(tokenizer.nextToken());context.write(word, one);}}}public static class CountReducer extends Reducer<Text, LongWritable, Text, DoubleWritable>{private DoubleWritable result = new DoubleWritable();@Overrideprotected void reduce(Text key, Iterable<LongWritable> values,Reducer<Text, LongWritable, Text, DoubleWritable>.Context context) throws IOException, InterruptedException {int sum = 0;for(LongWritable v : values){sum += v.get();}result.set(sum);context.write(key, result);}}static FileSystem fs = null;static Configuration conf=null;public static void init() throws Exception{//读取classpath下的xxx-site.xml 配置文件,并解析其内容,封装到conf对象中conf = new Configuration();//也可以在代码中对conf中的配置信息进行手动设置,会覆盖掉配置文件中的读取的值conf.set("fs.defaultFS", "hdfs://192.168.41.136:9000/");//根据配置信息,去获取一个具体文件系统的客户端操作实例对象 fs = FileSystem.get(new URI("hdfs://192.168.41.136:9000/"),conf,"hadoop");}public int run(String[] args) throws Exception {Job job = Job.getInstance(getConf(),"WordCount");job.setJarByClass(WordCount.class);job.setMapperClass(CountMapper.class);job.setReducerClass(CountReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(LongWritable.class);Path in = new Path("/WordCount/input");if(fs.exists(in)){FileInputFormat.addInputPath(job, in);}else{System.out.println("输入文件不存在!");}Path os = new Path("/WordCount/output");int flage = 0;if(fs.exists(os)){System.out.println("输出文件已经存在!重新新建路径!"); fs.delete(os, true); FileOutputFormat.setOutputPath(job, os); flage = job.waitForCompletion(false) ? 0:1;}else{FileOutputFormat.setOutputPath(job, os);flage = job.waitForCompletion(false) ? 0:1;}return flage;}public static void main(String[] args) throws Exception {init();int res = ToolRunner.run(new WordCount(), args);System.exit(res);}}
阅读全文
0 0
- mapreduce编程练习(一)简单的练习 WordCount
- MapReduce编程练习(一)
- MapReduce练习(一)
- 编程练习(一)
- mapreduce编程练习(二)倒排索引 Combiner的使用以及练习
- 简单的MapReduce程序wordCount
- MapReduce实现简单的Wordcount
- 一些简单的编程练习
- 编程题目的简单练习
- MapReduce算法一、简单求和计数(类似WordCount)
- MapReduce 练习一 找爷孙关系
- MapReduce练习(二)
- MapReduce练习(三)
- DOM编程练习(一)
- 编程练习2(一)
- MapReduce练习一:多MapReduce链接
- 简单汇编编程练习
- MapReduce 的简单例子 WordCount的实现
- Kotlin在Android上的运用(二)
- POJ 1195 Mobile phones 题解
- 20150519-jQuery ajax()
- SDN学习日记—基于RYU的hub开发1
- Windows/Android/IOS WebRTC音视频总结(二)
- mapreduce编程练习(一)简单的练习 WordCount
- Lowest Common Ancestor of a Binary Tree
- 如何看待 Google 最新的系统 Fuchsia?
- 对LOWORD, HIWORD, LOBYTE, HIBYTE的理解
- Unity在导入官方Assets出现“提示上下文不存在SessionState”问题的解决办法
- 第十三天H5进阶
- linux查看和修改PATH环境变量的方法
- 七个用户体验设计的小提示,创建最佳的移动设计
- HTML5之FileReader的使用,兼容IE10+