WordCount经典编程
来源:互联网 发布:工程预结算软件 编辑:程序博客网 时间:2024/06/15 22:41
WordCount原理:
MapReduce任务被初始化为一个Job,每个Job又可以分为两种阶段:map阶段和reduce阶段。这两个阶段分别用两个函数表示,即map函数和reduce函数。
/** * WordCount.java * com.hainiu.hadoop.mr * Copyright (c) 2017, 小马同学版权所有. * @author 小马同学*/package com.xiaoma.hadoop.mr;import java.io.IOException;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;/** * * @author 小马同学 * @Date 2017年5月16日 */public class WordCount extends Configured implements Tool { //内部类Map //com.hainiu.hadoop.mr.WordCount public static class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> { private Text word = new Text(); private LongWritable one = new LongWritable(1); //重新实现增加相应的功能 @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //所有原始数据 String str = value.toString(); //定义一个数组元素是按空格分割 String strs[] = str.split(" "); //遍历 for (String s : strs) { //把单词元素s设置进去 word.set(s); //把单词及其统计写出来 型如: Hello 1 1 1 1 context.write(word, one); } } } //内部类Reduce public static class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> { private LongWritable valueout = new LongWritable(); @Override protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { //用于统计每个单词的总个数 long sum = 0L; //统计单词个数 for (LongWritable num : values) { sum += num.get(); } //将个数sum设置进去 valueout.set(sum); context.write(key, valueout); } } public int run(String[] args) throws Exception { if (args.length !=2){ System.out.println("必须输入两个参数,<input> <output>"); return 1; } //定义job名字并得到配置 Job job = Job.getInstance(getConf(), "wordcount"); //设置jar使用的class job.setJarByClass(WordCount.class); //设置使用的mapper类型 job.setMapperClass(WordCountMapper.class); //设置使用的reduce类型 job.setReducerClass(WordCountReducer.class); //设置输出的key类型 job.setOutputKeyClass(Text.class); //设置输出的value类型 job.setOutputValueClass(LongWritable.class); //设置任务的输入地址 FileInputFormat.addInputPath(job, new Path(args[0])); //设置任务的输出地址 FileOutputFormat.setOutputPath(job, new Path(args[1])); //删除以前存在的目录 FileSystem fs = FileSystem.get(getConf()); if(fs.exists(new Path(args[1]))){ fs.delete(new Path(args[1]), true); System.out.println("out put delete finish"); } //提交状态 return job.waitForCompletion(true) ? 0 : 1; } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new WordCount(), args); System.exit(res); }}
阅读全文
0 0
- WordCount经典编程
- Hadoop入门经典:WordCount
- Hadoop入门经典:WordCount
- Hadoop入门经典:WordCount
- hadoop 经典入门wordcount
- Hadoop入门经典: WordCount程序
- mapreduce编程:wordcount
- MapReduce编程之WordCount
- MapReduce WordCount编程实例
- MapReduce编程(入门篇)-wordcount
- MapReduce编程实例之WordCount
- Mapreduce编程1之WordCount
- Spark 入门经典 WordCount 单机/伪分布式
- hadoop入门经典:wordcount代码详解
- wordcount
- wordcount
- WordCount
- wordCount
- Windows上安装Git
- ijkplayer框架深入剖析
- Android 为apk文件签名,增加修改系统时间等权限
- Android Studio 2.0+ CMake进行JNI开发
- 串——顺序存储结构
- WordCount经典编程
- 高德地图定位和附近功能
- 16进制的简单运算
- 正则表达式不以
- Activity的生命周期
- webview加载网页出现("找不到网页net:err_unknown_url_scheme")
- ijkplayer程序结构整理
- jboss7数据源密码加解密
- 点击获取联系人电话(类型为住址、公司等都可以获取)