mapreduce编程练习（二）倒排索引 Combiner的使用以及练习

来源：互联网发布：c语言代码基础题目编辑：程序博客网时间：2024/05/22 06:06

问题一：请使用利用Combiner的方式：根据图示内容编写maprdeuce程序

示例程序

package com.greate.learn;import java.io.IOException;import java.net.URI;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.DoubleWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.InputSplit;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.FileSplit;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class GetFile_Statistics extends Configured implements Tool {public static class CountMapper extends Mapper<LongWritable, Text, Text, Text>{private Text word = new Text();private Text one = new Text(1+"");@Overrideprotected void map(LongWritable key,Text value,Mapper<LongWritable, Text, Text, Text>.Context context)throws IOException,InterruptedException{System.out.println("line pos:" + key.toString());String line = value.toString();String fileName = ((FileSplit) context.getInputSplit()).getPath().getName();StringTokenizer tokenizer = new StringTokenizer(line);while (tokenizer.hasMoreElements()) {word.set(tokenizer.nextToken()+" :  "+fileName);context.write(word, one);}}}public static class Combiner extends Reducer<Text, Text, Text, Text>{@Overrideprotected void reduce(Text key, Iterable<Text> values,Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {int sum = 0;for(Text v : values){sum += Integer.parseInt(v.toString());}System.out.println("sum:" + sum);String[] valueString = key.toString().split(" : ");context.write(new Text(valueString[0]), new Text(valueString[1]+":" + sum));}}public static class CountReducer extends Reducer<Text, Text, Text, Text>{static String beforeKey = "";static String beforeValue ="";@Overrideprotected void reduce(Text key, Iterable<Text> values,Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {String key2 = key.toString();String value = "";for(Text text: values){value = text.toString();if(key2.equals(beforeKey)){beforeKey = key2;beforeValue = beforeValue +";"+value;}else{beforeKey =  key2;beforeValue = value;}}context.write(new Text(beforeKey), new Text(beforeValue));}}static FileSystem fs = null;static Configuration conf=null;public static void init() throws Exception{conf = new Configuration();conf.set("fs.defaultFS", "hdfs://localhost:9000/"); fs = FileSystem.get(new URI("hdfs://localhost:9000/"),conf,"hadoop");}public int run(String[] args) throws Exception {Job job = Job.getInstance(getConf(),"WordCount");job.setJarByClass(GetFile_Statistics.class);job.setMapperClass(CountMapper.class);job.setCombinerClass(Combiner.class);job.setReducerClass(CountReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);Path in = new Path("/GetFile_Statistics/input");if(fs.exists(in)){FileInputFormat.addInputPath(job, in);}else{System.out.println("文件夹不存在，需要创建！");}Path os = new Path("/GetFile_Statistics/output");int flage = 0;if(fs.exists(os)){System.out.println("文件夹存在！不再创建！"); fs.delete(os, true);   FileOutputFormat.setOutputPath(job, os); flage = job.waitForCompletion(false) ? 0:1;}else{FileOutputFormat.setOutputPath(job, os);flage = job.waitForCompletion(false) ? 0:1;}return  flage;}public static void main(String[] args) throws Exception {init();int res = ToolRunner.run(new GetFile_Statistics(), args);System.exit(res);}}

问题二：现有一批电话通信清单，记录了用户A拨打某些特殊号码（如120，10086,13800138000等）的记录。需要做一个统计结果，记录拨打给用户B的所有用户A。

示例程序

package com.greate.learn;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class PhoneNumber_Statistic extends Configured implements Tool{public static void main (String[] args) throws Exception{ToolRunner.run(new PhoneNumber_Statistic(), args);}public int run(String[] arg0) throws Exception{Configuration conf = getConf();    Job job = new Job(conf);job.setJarByClass(getClass());FileSystem fs = FileSystem.get(conf);FileInputFormat.setInputPaths(job, new Path("/PhoneNumber_Statistics/input/"));FileOutputFormat.setOutputPath(job, new Path("/PhoneNumber_Statistics/output/"));job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);job.setMapperClass(numberMap.class);job.setReducerClass(numberReduce.class);job.waitForCompletion(true);return 0;}}class numberMap extends Mapper<LongWritable, Text, Text, Text>{ protected void map(LongWritable key, Text value, Context context) throws IOException,InterruptedException{String[] list = value.toString().split(" "); String keyy = list[1];String valuee = list[0];context.write(new Text(keyy), new Text(valuee));}}class numberReduce extends Reducer<Text, Text, Text, Text>{//��������protected void reduce(Text key, Iterable<Text> values, Context context)throws IOException,InterruptedException{ String valuee;         String out = "";          for(Text value:values){          valuee  = value.toString() + " | ";         out +=valuee;         }        context.write(key,new Text(out));}}

1 0