mapreduce文档倒排索引例程
来源:互联网 发布:淘宝一元拍在哪里 编辑:程序博客网 时间:2024/05/16 05:32
import java.io.IOException;
import java.util.StringTokenizer;
import java.util.Collections;
import java.util.Iterator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class InvertedIndex {
public static class InvertedIndexMapper extends Mapper<Object,Text,Text,Text>
{
public void map(Object key,Text value,Context context)
throws IOException,InterruptedException
{
FileSplit fileSplit = (FileSplit)context.getInputSplit();
String fileName = fileSplit.getPath().getName();
Text word = new Text();
Text fileName_lineOffset = new Text(fileName+"#"+key.toString());
StringTokenizer itr = new StringTokenizer(value.toString());
for(;itr.hasMoreTokens();)
{
word.set(itr.nextToken());
context.write(word, fileName_lineOffset);
}
}
}
public static class InvertedIndexReducer extends Reducer<Text,Text,Text,Text>
{
public void reduce(Text key , Iterable<Text>values,Context context)
throws IOException,InterruptedException
{
Iterator<Text> it = values.iterator();
StringBuilder all = new StringBuilder();
if(it.hasNext()) all.append(it.next().toString());
for(;it.hasNext();)
{
all.append(";");
all.append(it.next().toString());
}
context.write(key, new Text(all.toString()));
}
}
public static void main(String[] args)
{
try
{
Configuration conf = new Configuration();
Job job = new Job(conf,"invertindex");
job.setJarByClass(InvertedIndex.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(InvertedIndexMapper.class);
job.setReducerClass(InvertedIndexReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
System.exit(job.waitForCompletion(true)?0:1);
}
catch(Exception e)
{
e.printStackTrace();
}
}
}
import java.util.StringTokenizer;
import java.util.Collections;
import java.util.Iterator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class InvertedIndex {
public static class InvertedIndexMapper extends Mapper<Object,Text,Text,Text>
{
public void map(Object key,Text value,Context context)
throws IOException,InterruptedException
{
FileSplit fileSplit = (FileSplit)context.getInputSplit();
String fileName = fileSplit.getPath().getName();
Text word = new Text();
Text fileName_lineOffset = new Text(fileName+"#"+key.toString());
StringTokenizer itr = new StringTokenizer(value.toString());
for(;itr.hasMoreTokens();)
{
word.set(itr.nextToken());
context.write(word, fileName_lineOffset);
}
}
}
public static class InvertedIndexReducer extends Reducer<Text,Text,Text,Text>
{
public void reduce(Text key , Iterable<Text>values,Context context)
throws IOException,InterruptedException
{
Iterator<Text> it = values.iterator();
StringBuilder all = new StringBuilder();
if(it.hasNext()) all.append(it.next().toString());
for(;it.hasNext();)
{
all.append(";");
all.append(it.next().toString());
}
context.write(key, new Text(all.toString()));
}
}
public static void main(String[] args)
{
try
{
Configuration conf = new Configuration();
Job job = new Job(conf,"invertindex");
job.setJarByClass(InvertedIndex.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(InvertedIndexMapper.class);
job.setReducerClass(InvertedIndexReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
System.exit(job.waitForCompletion(true)?0:1);
}
catch(Exception e)
{
e.printStackTrace();
}
}
}
- mapreduce文档倒排索引例程
- MapReduce--倒排索引
- mapreduce--倒排索引
- mapreduce实现倒排索引
- MapReduce实现倒排索引
- mapreduce实现倒排索引
- MapReduce倒排索引概要
- MapReduce实战--倒排索引
- MapReduce倒排索引实现
- MapReduce实例----倒排索引
- MapReduce实现倒排索引
- MapReduce 编程之 倒排索引
- mapreduce在倒排索引中练习
- MapReduce倒排索引简单实现
- hadoop mapReduce程序模板-倒排索引
- 倒排索引和MapReduce简介
- Hadoop之MapReduce-倒排索引案例
- MapReduce编程之倒排索引
- Windows7 下memcached memadmin的安装与使用
- UITableView生活使用
- 写一函数,输入一个四位数字,要求输出这四个数字字符,但每两个数字间空格。如输入1990,应输出"1 9 9 0"。
- 开源 Android pdf 阅读器开发总结
- sqlite 数据库 对 BOOL型 数据的插入处理
- mapreduce文档倒排索引例程
- 这些人和事,埋葬了雅虎中国
- 0欧电阻的作用
- 其实,PC依然活得很好
- hibernate中取得session的两种方式
- PC真的死了吗?谁是最后的赢家?
- js自定义字典对象,键值对
- MyBatis简介
- oracle virtual index