MapReduce 基本模版与WordCount代码
来源:互联网 发布:淘宝帐号可以注销吗 编辑:程序博客网 时间:2024/06/12 01:20
一. 基本模版
package com.ibeifeng.bigdata.senior.hadoop.mapreduce; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class WordCountMapReduce { // step 1 : Mapper Class public static class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> { @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { } } // step 2 : Reducer Class public static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> { @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { } } // step 3 : Driver public int run(String[] args) throws Exception { Configuration configuration = new Configuration(); Job job = Job.getInstance(configuration, this.getClass() .getSimpleName()); job.setJarByClass(this.getClass()); // set job // input Path inpath = new Path(args[0]); FileInputFormat.addInputPath(job, inpath); // output Path outpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outpath); // Mapper job.setMapperClass(WordCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // Reducer job.setReducerClass(WordCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // submit job boolean isSuccess = job.waitForCompletion(true); return isSuccess ? 0 : 1; } public static void main(String[] args) throws Exception { //run job int status=new WordCountMapReduce().run(args); System.exit(status); } }
二. WordCount
package com.ibeifeng.bigdata.senior.hadoop.mapreduce;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class WordCountMapReduce { // step 1 : Mapper Class public static class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> { // 输出单词 private Text mapOutputKey = new Text(); // 出现一次就记作一次 private IntWritable mapOutputValue = new IntWritable(1); @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { System.out.println("map-in-0-key: " + key.get() + " -- " + "map-in-value: " + value.toString()); // line value // 获取文件每一行的<key,value> String lineValue = value.toString(); // split // 分割单词,以空格分割 String[] strs = lineValue.split(" "); // iterator // 将数组里面的每一个单词拿出来,一个个组成<key,value> // 生成1 for (String str : strs) { // set map output key // 设置key mapOutputKey.set(str); // output // 最终输出 context.write(mapOutputKey, mapOutputValue); } } } // step 2 : Reducer Class public static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable outputValue = new IntWritable(); @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { // temp : sum // 定义一个临时变量 int sum = 0; // iterator // 对于迭代器中的值进行迭代累加,最后sum加完以后就是统计的次数 for (IntWritable value : values) { // total sum += value.get(); } // set output value outputValue.set(sum); // output context.write(key, outputValue); } } // step 3 : Driver public int run(String[] args) throws Exception { Configuration configuration = new Configuration(); Job job = Job.getInstance(configuration, this.getClass() .getSimpleName()); job.setJarByClass(WordCountMapReduce.class); // set job // input Path inpath = new Path(args[0]); FileInputFormat.addInputPath(job, inpath); // output Path outpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outpath); // Mapper job.setMapperClass(WordCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // Reducer job.setReducerClass(WordCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // submit job boolean isSuccess = job.waitForCompletion(true); return isSuccess ? 0 : 1; } public static void main(String[] args) throws Exception { // 传递两个参数,设置路径 args = new String[] { // 参数1:输入路径 "hdfs://hadoop-senior01.ibeifeng.com:8020/user/beifeng/input", // 参数2:输出路径 "hdfs://hadoop-senior01.ibeifeng.com:8020/user/beifeng/output3" }; // run job int status = new WordCountMapReduce().run(args); System.exit(status); }}
0 0
- MapReduce 基本模版与WordCount代码
- MapReduce WordCount 代码初探
- MapReduce 新旧WordCount 代码解读
- mapreduce代码整理之wordcount
- MapReduce基本原理与WordCount程序
- MapReduce之WordCount、FlowCount代码汇总
- mapreduce学习笔记-wordcount代码实现
- Mapreduce wordCount
- MapReduce WordCount
- MapReduce WordCount
- wordCount MapReduce
- 学习Hadoop MapReduce与WordCount例子分析
- 5. MapReduce 结构与wordcount编程案例
- Hadoop之Mapreduce------>入门级程序WordCount代码编写
- 初学Hadoop之图解MapReduce与WordCount示例分析
- 初学Hadoop之图解MapReduce与WordCount示例分析
- mapreduce框架设计思想,wordcount程序原理与实现
- mapreduce-wordcount例子
- MPAndroidChart 教程:数据格式器 ValueFormatter(五)
- 机器学习&深度学习学习资源
- JAVA基础 day25 网络编程 IP类 UDP,TCP传输学习 简易聊天工具 TCP并发学习
- 搞懂JAVA集合类--线程安全问题(六)
- MPAndroidChart 教程:图表的具体设置 Specific chart settings(六)
- MapReduce 基本模版与WordCount代码
- map集合遍历
- Acticle 18:jquery基础(基本选择器)具体实例
- POJ 3349 - 数值哈希(这辈子只服数据量)
- 验证整数、小数、实数、有效位小数最简单JavaScript正则表达式
- Hiveserver2 beeline error java.io.FileNotFoundException: minlog-1.2.jar
- 在windows下将Mongodb注册为服务,报错
- MPAndroidchart 教程:图例 Legend(七)
- Two Sum (LeetCode OJ)