十二、用MapReduce完成类似倒排索引的功能

来源:互联网 发布:高性能开源网络库 编辑:程序博客网 时间:2024/04/29 08:21


1)理解【倒排索引】功能
2)熟悉 MapReduce 中的 Combiner 功能
3)依据需求编码实现【倒排索引】功能,旨在对 MapReduce理解。
数据:


结果:

代码:

package com.hyhc.mr; import java.io.IOException; import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;importorg.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;  publicclass InvertedIndexMapReduce extends Configured implements Tool{//url:key1->10//url:key2->12//url1:key2->3    publicstaticclass IndexMapper extends       Mapper<LongWritable,Text,Text,Text>{       private Text mapOutputKey = new Text();       private Text mapOutputValue = new Text("1");       @Override       publicvoid map(LongWritable key, Text value, Context context)              throws IOException, InterruptedException {           String lineValue=value.toString();           String strs[]=lineValue.split("##");           String url = strs[0] ;           String title = strs[1] ;           String content = strs[2] ;           String[] tstrs = title.split(" ") ;           for(String ts : tstrs){              mapOutputKey.set(ts+","+url);              context.write(mapOutputKey, mapOutputValue);           }           String[] cstrs = content.split(" ") ;           for(String cs : cstrs){              mapOutputKey.set(cs+","+url);              context.write(mapOutputKey, mapOutputValue);           }        }    }       publicstaticclass  IndexCombiner extends    Reducer<Text,Text,Text,Text>{    private Text combinerOutoutKey = new Text() ;    private Text combinerOutputValue = new Text();       @Override    publicvoid reduce(Text key, Iterable<Text> values,           Context context)           throws IOException, InterruptedException {             String keys[]=key.toString().split(",");       combinerOutoutKey.set(keys[0]);       intsum = 0 ;       for(Text value : values){           sum += Integer.parseInt(value.toString());       }       combinerOutputValue.set(keys[1]+"->"+sum);       context.write(combinerOutoutKey, combinerOutputValue);    }}    publicstaticclass IndexReducer extends       Reducer<Text,Text,Text,Text>{       private Text outputKey = new Text() ;       private Text splitline = new Text("----------------");       private Text splitline1 = new Text("----------------------------------------");       @Override       publicvoid reduce(Text key, Iterable<Text> values,              Context context)              throws IOException, InterruptedException {           outputKey.set("key:"+key);           context.write(outputKey, null);           context.write(splitline, null);           for(Text value : values){              context.write(null, value);           }           context.write(splitline1, null);       }    }    publicint run(String[] args) throws Exception {       Configuration configuration = super.getConf() ;       Job job = Job.getInstance(           configuration,           this.getClass().getSimpleName()       );       job.setJarByClass(this.getClass());       Path inPath = new Path(args[0]) ;       FileInputFormat.addInputPath(job, inPath);       job.setMapperClass(IndexMapper.class);       job.setMapOutputKeyClass(Text.class);       job.setMapOutputValueClass(Text.class);       job.setCombinerClass(IndexCombiner.class);       job.setReducerClass(IndexReducer.class);       job.setOutputKeyClass(Text.class);       job.setOutputValueClass(Text.class);       Path outPath = new Path(args[1]);       FileOutputFormat.setOutputPath(job, outPath);       booleanisSuccess = job.waitForCompletion(true);       returnisSuccess ? 0 : 1 ;    }       publicstaticvoid main(String[] args) throws Exception {       Configuration configuration = new Configuration();       intstatus = ToolRunner.run(//           configuration, //           new InvertedIndexMapReduce(), //           args       ) ;       System.exit(status);    }} 

0 0
原创粉丝点击