mapreduce编程练习(二)倒排索引 Combiner的使用以及练习
来源:互联网 发布:c语言代码基础题目 编辑:程序博客网 时间:2024/05/22 06:06
问题一:请使用利用Combiner的方式:根据图示内容编写maprdeuce程序
示例程序
package com.greate.learn;import java.io.IOException;import java.net.URI;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.DoubleWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.InputSplit;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.FileSplit;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class GetFile_Statistics extends Configured implements Tool {public static class CountMapper extends Mapper<LongWritable, Text, Text, Text>{private Text word = new Text();private Text one = new Text(1+"");@Overrideprotected void map(LongWritable key,Text value,Mapper<LongWritable, Text, Text, Text>.Context context)throws IOException,InterruptedException{System.out.println("line pos:" + key.toString());String line = value.toString();String fileName = ((FileSplit) context.getInputSplit()).getPath().getName();StringTokenizer tokenizer = new StringTokenizer(line);while (tokenizer.hasMoreElements()) {word.set(tokenizer.nextToken()+" : "+fileName);context.write(word, one);}}}public static class Combiner extends Reducer<Text, Text, Text, Text>{@Overrideprotected void reduce(Text key, Iterable<Text> values,Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {int sum = 0;for(Text v : values){sum += Integer.parseInt(v.toString());}System.out.println("sum:" + sum);String[] valueString = key.toString().split(" : ");context.write(new Text(valueString[0]), new Text(valueString[1]+":" + sum));}}public static class CountReducer extends Reducer<Text, Text, Text, Text>{static String beforeKey = "";static String beforeValue ="";@Overrideprotected void reduce(Text key, Iterable<Text> values,Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {String key2 = key.toString();String value = "";for(Text text: values){value = text.toString();if(key2.equals(beforeKey)){beforeKey = key2;beforeValue = beforeValue +";"+value;}else{beforeKey = key2;beforeValue = value;}}context.write(new Text(beforeKey), new Text(beforeValue));}}static FileSystem fs = null;static Configuration conf=null;public static void init() throws Exception{conf = new Configuration();conf.set("fs.defaultFS", "hdfs://localhost:9000/"); fs = FileSystem.get(new URI("hdfs://localhost:9000/"),conf,"hadoop");}public int run(String[] args) throws Exception {Job job = Job.getInstance(getConf(),"WordCount");job.setJarByClass(GetFile_Statistics.class);job.setMapperClass(CountMapper.class);job.setCombinerClass(Combiner.class);job.setReducerClass(CountReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);Path in = new Path("/GetFile_Statistics/input");if(fs.exists(in)){FileInputFormat.addInputPath(job, in);}else{System.out.println("文件夹不存在,需要创建!");}Path os = new Path("/GetFile_Statistics/output");int flage = 0;if(fs.exists(os)){System.out.println("文件夹存在!不再创建!"); fs.delete(os, true); FileOutputFormat.setOutputPath(job, os); flage = job.waitForCompletion(false) ? 0:1;}else{FileOutputFormat.setOutputPath(job, os);flage = job.waitForCompletion(false) ? 0:1;}return flage;}public static void main(String[] args) throws Exception {init();int res = ToolRunner.run(new GetFile_Statistics(), args);System.exit(res);}}问题二:现有一批电话通信清单,记录了用户A拨打某些特殊号码(如120,10086,13800138000等)的记录。需要做一个统计结果,记录拨打给用户B的所有用户A。
示例程序
package com.greate.learn;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class PhoneNumber_Statistic extends Configured implements Tool{public static void main (String[] args) throws Exception{ToolRunner.run(new PhoneNumber_Statistic(), args);}public int run(String[] arg0) throws Exception{Configuration conf = getConf(); Job job = new Job(conf);job.setJarByClass(getClass());FileSystem fs = FileSystem.get(conf);FileInputFormat.setInputPaths(job, new Path("/PhoneNumber_Statistics/input/"));FileOutputFormat.setOutputPath(job, new Path("/PhoneNumber_Statistics/output/"));job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);job.setMapperClass(numberMap.class);job.setReducerClass(numberReduce.class);job.waitForCompletion(true);return 0;}}class numberMap extends Mapper<LongWritable, Text, Text, Text>{ protected void map(LongWritable key, Text value, Context context) throws IOException,InterruptedException{String[] list = value.toString().split(" "); String keyy = list[1];String valuee = list[0];context.write(new Text(keyy), new Text(valuee));}}class numberReduce extends Reducer<Text, Text, Text, Text>{//��������protected void reduce(Text key, Iterable<Text> values, Context context)throws IOException,InterruptedException{ String valuee; String out = ""; for(Text value:values){ valuee = value.toString() + " | "; out +=valuee; } context.write(key,new Text(out));}}
1 0
- mapreduce编程练习(二)倒排索引 Combiner的使用以及练习
- mapreduce在倒排索引中练习
- MapReduce实战练习三:倒排索引
- MapReduce练习二(单表关联,多表关联,倒排索引)
- Hadoop—MapReduce练习(数据去重、数据排序、平均成绩、倒排索引)
- Hadoop—MapReduce练习(数据去重、数据排序、平均成绩、倒排索引)
- MapReduce 练习三 文件倒排
- MapReduce编程之倒排索引的实现
- MapReduce编程之倒排索引的实现
- MapReduce 编程之 倒排索引
- MapReduce编程之倒排索引
- MapReduce编程——倒排索引
- MapReduce编程(七) 倒排索引构建
- 使用MapReduce实现简单的倒排索引
- 使用MapReduce实现复杂的倒排索引
- MapReduce--倒排索引
- mapreduce--倒排索引
- 倒排索引的分布式实现(MapReduce程序)
- 微信小程序开发—项目实战之计算器开发
- 在VMware Workstation中创建一个虚拟软盘,并在Windows XP上使用
- Android Toolbar 使用总结
- 勒索蠕虫-WanaCrypt0r(比特币病毒)防治攻略和事件全回顾
- 编译优化(未懂,一定要搞懂)
- mapreduce编程练习(二)倒排索引 Combiner的使用以及练习
- 求助hibernate单表分页查询jsp和servlet的传值代码 实在写不出来了
- java最大公约数
- 采用maven搭建springmvc+mybaits项目
- 计算机网络之ARP协议
- smssdk导入教程
- 【兴趣】QQ音乐VS网易云音乐竞品分析
- 并查集大致就这样。
- 基于FPGA的高速串行通信之GTX收发器——差分IO信号