mapreduce之倒排索引代码

来源:互联网 发布:张辛苑的淘宝店 编辑:程序博客网 时间:2024/05/23 12:12

倒排索引源于实际应用中需要根据属性的值来查找记录。这种索引表中的每一项都包括一个属性值和具有该属性值的各记录的地址。由于不是由记录来确定属性值,而是由属性值来确定记录的位置,因而称为倒排索引(inverted index)。带有倒排索引的文件我们称为倒排索引文件,简称倒排文件(inverted file)

mr实现(java):

package invertedIndex;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.FileSplit;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class InvertedIndex {    public static class IIMap extends Mapper<LongWritable, Text, Text, Text>{        private Text k = new Text();        private Text v = new Text();        @Override        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)                throws IOException, InterruptedException {            FileSplit split = (FileSplit) context.getInputSplit();            String[] strs = value.toString().split(" ");            for(String str:strs){                k.set(str+"\t"+split.getPath().toString());                v.set("1");                context.write(k, v);            }        }    }    public static class IICombiner extends Reducer<Text, Text, Text, Text>{        private Text k = new Text();        private Text v = new Text();        @Override        protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)                throws IOException, InterruptedException {            String[] strs = key.toString().split("\t");            long sum = 0;            for(Text t:values){                sum+= Long.parseLong(t.toString());            }            k.set(strs[0]);            v.set(strs[1]+"\t"+sum);            context.write(k, v);        }    }    public static class IIReduce extends Reducer<Text, Text, Text, Text>{        private Text v=new Text();        @Override        protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)                throws IOException, InterruptedException {            String value="";            for(Text t : values){                value += t.toString() +";";            }            v.set(value);            context.write(key, v);        }    }    public static void main(String[] args) throws Exception {        Configuration config = new Configuration();        config.set("fs.defaultFS", "hdfs://192.168.8.8:9000/");        Job job=Job.getInstance(config);        job.setJarByClass(InvertedIndex.class);        job.setMapperClass(IIMap.class);        job.setCombinerClass(IICombiner.class);        job.setReducerClass(IIReduce.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(Text.class);        FileInputFormat.addInputPath(job, new Path("/root/"));        FileOutputFormat.setOutputPath(job, new Path("/result/combiner"));        System.exit(job.waitForCompletion(true)?0:1);    }}
0 0
原创粉丝点击