十二、用MapReduce完成类似倒排索引的功能
来源:互联网 发布:高性能开源网络库 编辑:程序博客网 时间:2024/04/29 08:21
1)理解【倒排索引】功能
2)熟悉 MapReduce 中的 Combiner 功能
3)依据需求编码实现【倒排索引】功能,旨在对 MapReduce理解。
数据:
结果:
代码:
package com.hyhc.mr; import java.io.IOException; import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;importorg.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner; publicclass InvertedIndexMapReduce extends Configured implements Tool{//url:key1->10//url:key2->12//url1:key2->3 publicstaticclass IndexMapper extends Mapper<LongWritable,Text,Text,Text>{ private Text mapOutputKey = new Text(); private Text mapOutputValue = new Text("1"); @Override publicvoid map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String lineValue=value.toString(); String strs[]=lineValue.split("##"); String url = strs[0] ; String title = strs[1] ; String content = strs[2] ; String[] tstrs = title.split(" ") ; for(String ts : tstrs){ mapOutputKey.set(ts+","+url); context.write(mapOutputKey, mapOutputValue); } String[] cstrs = content.split(" ") ; for(String cs : cstrs){ mapOutputKey.set(cs+","+url); context.write(mapOutputKey, mapOutputValue); } } } publicstaticclass IndexCombiner extends Reducer<Text,Text,Text,Text>{ private Text combinerOutoutKey = new Text() ; private Text combinerOutputValue = new Text(); @Override publicvoid reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String keys[]=key.toString().split(","); combinerOutoutKey.set(keys[0]); intsum = 0 ; for(Text value : values){ sum += Integer.parseInt(value.toString()); } combinerOutputValue.set(keys[1]+"->"+sum); context.write(combinerOutoutKey, combinerOutputValue); }} publicstaticclass IndexReducer extends Reducer<Text,Text,Text,Text>{ private Text outputKey = new Text() ; private Text splitline = new Text("----------------"); private Text splitline1 = new Text("----------------------------------------"); @Override publicvoid reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { outputKey.set("key:"+key); context.write(outputKey, null); context.write(splitline, null); for(Text value : values){ context.write(null, value); } context.write(splitline1, null); } } publicint run(String[] args) throws Exception { Configuration configuration = super.getConf() ; Job job = Job.getInstance( configuration, this.getClass().getSimpleName() ); job.setJarByClass(this.getClass()); Path inPath = new Path(args[0]) ; FileInputFormat.addInputPath(job, inPath); job.setMapperClass(IndexMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setCombinerClass(IndexCombiner.class); job.setReducerClass(IndexReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); Path outPath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outPath); booleanisSuccess = job.waitForCompletion(true); returnisSuccess ? 0 : 1 ; } publicstaticvoid main(String[] args) throws Exception { Configuration configuration = new Configuration(); intstatus = ToolRunner.run(// configuration, // new InvertedIndexMapReduce(), // args ) ; System.exit(status); }}
0 0
- 十二、用MapReduce完成类似倒排索引的功能
- MapReduce--倒排索引
- mapreduce--倒排索引
- MapReduce 倒排索引的实现
- MapReduce功能实现十---倒排索引(Inverted Index)
- mapreduce实现倒排索引
- MapReduce实现倒排索引
- mapreduce实现倒排索引
- MapReduce倒排索引概要
- MapReduce实战--倒排索引
- MapReduce倒排索引实现
- MapReduce实例----倒排索引
- MapReduce实现倒排索引
- MapReduce编程之倒排索引的实现
- mapreduce实现搜索引擎简单的倒排索引
- 使用MapReduce实现简单的倒排索引
- 使用MapReduce实现复杂的倒排索引
- 倒排索引的分布式实现(MapReduce程序)
- Activity与Fragment的传参
- 125. Valid Palindrome \ 459. Repeated Substring Pattern
- 【GDKOI2017模拟1.21】Rhyme
- android studio签名打包方法
- 一个由进程内存布局异常引起的问题
- 十二、用MapReduce完成类似倒排索引的功能
- 将对象转换成json格式
- J2EE简介
- my.ini文件
- 两种关于ajax跨域请求解决方案
- 关于hadoop与jstl冲突的jar包问题
- .net EF框架 MySql实现实例
- Java 减少GC开销的几个措施
- [Mysql] 如何实现按距离排序、范围查找