使用MapReduce开发WordCount应用程序
来源:互联网 发布:软件汉化教程 编辑:程序博客网 时间:2024/06/05 06:15
package com.hadoop.mapreduce;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;/** * 使用MapReduce开发WordCount应用程序 */public class WordCountApp { /** * Map:读取输入的文件 * Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> */ public static class MyMapper extends Mapper<LongWritable,Text,Text,LongWritable>{ LongWritable one = new LongWritable(1); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //接收到的每一行数据 String line = value.toString(); //按照指定分隔符进行拆分 String[] words = line.split(" "); for ( String word : words ) { //通过上下文把map的处理结果输出 context.write(new Text(word),one); } } } /** * Reduce: 归并操作 */ public static class MyReducer extends Reducer<Text,LongWritable,Text,LongWritable>{ @Override protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long sum = 0; for (LongWritable value:values) { //求key出现的次数总和 sum += value.get(); } //最终统计结果的输出 context.write(key,new LongWritable(sum)); } } /** * 定义Driver:封装了MapReduce作业的所有信息 * @param args */ public static void main(String[] args) throws Exception { //创建Configuration Configuration configuration = new Configuration(); //准备清理已存在的输出目录 Path outputPath = new Path(args[1]); FileSystem fileSystem = FileSystem.get(configuration); if (fileSystem.exists(outputPath)){ fileSystem.delete(outputPath,true); System.out.println("output file exists, but is has deleted"); } //创建Job Job job = Job.getInstance(configuration, "wordcount"); //设置job的处理类 job.setJarByClass(WordCountApp.class); //设置作业处理的输入路径 FileInputFormat.setInputPaths(job,new Path(args[0])); //设置map相关参数 job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); //设置reduce相关参数 job.setReducerClass(MyReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); //设置作业处理的输出路径 FileOutputFormat.setOutputPath(job,new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } /** * 打包:mvn clean package -DskipTests * 上传到服务器:scp target/hadoop-train.1.0.jar hadoop@hadoop000:~/lib * 运行:hadoop jar /home/hadoop/lib/hadoop-train-1.0.jar com.imooc.hadoop.mapreduce.WordCountApp * hdfs://hadoop000:8020/hello.txt hdfs://hadoop000:8020/output/wc * * 创建文件:mkdir wc_shell.sh * hadoop fs -rm -r /output/wc #先删除 * hadoop jar /home/hadoop/lib/hadoop-train-1.0.jar com.imooc.hadoop.mapreduce.WordCountApp * hdfs://hadoop000:8020/hello.txt hdfs://hadoop000:8020/output/wc * 增加执行权限:chmod u+x wc_shell.sh */}
阅读全文
0 0
- 使用MapReduce开发WordCount应用程序
- Mapreduce wordCount
- MapReduce WordCount
- MapReduce WordCount
- wordCount MapReduce
- 4 开发MapReduce应用程序
- 使用python实现MapReduce的wordcount实例
- MapReduce基础开发之一词汇统计和排序(wordcount)
- 在eclipse上搭建mapreduce开发环境及运行wordcount
- Hadoop之MapReduce的程序开发(二)---WordCount
- mapreduce-wordcount例子
- MapReduce例子1--wordcount
- mapreduce-从wordcount开始
- mapreduce WordCount 学习笔记
- mapreduce编程:wordcount
- MapReduce编程之WordCount
- MapReduce——wordcount
- mapreduce wordcount案例
- python3 os.path.realpath(__file__) 和 os.path.cwd() 方法的区别
- Hibernate:doInHibernate
- ajax之async属性
- 关于web项目sessionID欺骗的问题
- 最小二乘法与最优线性滤波
- 使用MapReduce开发WordCount应用程序
- maven实用
- ARM处理器模式切换(含MRS,MSR指令)
- Java的输入输出
- 数据院科技大数据研究中心发布半世纪全球顶级学者迁移图(附视频、亿级学术信息)
- 报名 | IBM苏中:从深蓝到AlphaGo,从大数据到认知商业
- 新智能安防:人脸识别技术与应用系统 | 清华大学王生进
- python
- 阅读笔记:一种用于深度神经网络压缩的滤波器级别的剪枝算法ThiNet