MapReduce购物推荐引擎

来源:互联网 发布:财务管理app软件 编辑:程序博客网 时间:2024/05/18 12:43

源码:

package com.recommend;import com.recommend.util.TextTup;import com.recommend.util.TextTupGroupComparator;import com.recommend.util.TextTupSortComparator;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;import java.io.IOException;import java.util.Iterator;//看过去的代码就跟和过去的自己沟通一样,可以看到当时自己的思路。和写作不同,代码可以看到自己精巧的设计。唯代码和文章以郷自己。2017-11-4public class GetRecommend extends Configured implements Tool{    //main方法    public static void main(String[] args) throws Exception {        System.exit(ToolRunner.run(new GetRecommend(),args));    }    //job0负责将购物清单按物品进行分组    public static class PlinksMapper extends Mapper<LongWritable,Text,Text,Text>{        String user;        String good;        int amount;        private Text key = new Text();        private Text value = new Text();        @Override        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {            String[] split = value.toString().split(" ");            user=split[0];            good=split[1];            amount=Integer.valueOf(split[2]);            this.key.set(good);            //this.value.set(user+":"+amount);            this.value.set(user+":"+amount);            context.write(this.key,this.value);        }    }    public static class PlinksReducer extends Reducer<Text,Text,Text,Text> {        Text goods =new Text();        @Override        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {            StringBuffer buffer= new StringBuffer();            for (Text good:values){                buffer.append(good+"\t");            }            goods.set(buffer.toString());            context.write(key,goods);        }    }    //job1负责将购物清单里用户和物品进行分组    public static class UserOfGoodsMapper extends Mapper<LongWritable,Text,Text,Text> {        String user;        String good;        //这里的amount为后期加强推荐准确度做准备,amount为客户购买该物品的数量        //int amount;        private Text key = new Text();        private Text value = new Text();        @Override        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {            String[] split = value.toString().split(" ");            user=split[0];            good=split[1];            //amount=Integer.valueOf(split[2]);            this.key.set(user);            this.value.set(good);            context.write(this.key,this.value);        }    }    public static class UserOfGoodsReducer extends Reducer<Text,Text,Text,Text> {        //NullWritable n=NullWritable.get();        Text key = new Text();        Text goods =new Text();        @Override        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {            StringBuffer buffer= new StringBuffer();            for (Text good:values){                buffer.append(good+"\t");            }            goods.set(buffer.toString());            context.write(key,goods);        }    }    //job2:从全局角度得到物品的相似度矩阵,就是商场的全部购物清单中物品A与哪些物品同时被客户购买了(同时出现-->共现)。此处原始相似度的数值均设为1,可为以后扩展这一块做预备。    public static class GoodsOfGoodsMapper extends Mapper<LongWritable, Text, Text, IntWritable> {            IntWritable value = new IntWritable(1);            @Override            protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {                String[] split = value.toString().split("\t");                int length = split.length;                for (int i = 1; i < length; i++) {                    for (int j = 1; j < length; j++) {                        String s = split[i] + "\t" + split[j];                        context.write(new Text(s), this.value);                    }                }            }        }    public static class GoodsOfGoodsReducer extends Reducer<Text,IntWritable,Text,IntWritable> {            @Override            protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {                int count=0;                for (IntWritable i :values){                    count+=i.get();                }                context.write(key,new IntWritable(count));            }    }    //对物物关系进行矩阵呈现    public static class GoodsOfGoodsViewMapper extends Mapper<LongWritable,Text,Text,Text> {        Text key = new Text();        Text value = new Text();        @Override        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {            String[] split = value.toString().split("\t");            this.key.set(split[0].trim());            this.value.set(split[1].trim()+":"+split[2]);            context.write(this.key,this.value);        }    }    public static class GoodsOfGoodsViewReducer extends Reducer<Text,Text,Text,Text> {        @Override        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {            StringBuffer buffer = new StringBuffer();            for (Text i :values){                buffer.append(i.toString()+"\t");            }            context.write(key,new Text(buffer.toString()));        }    }    //job4根据物物共现矩阵和客户购买了哪些物品,进行相乘计算。    public static class ReGoodsOfGoodsMapper extends Mapper<LongWritable,Text,TextTup,Text> {        TextTup key = new TextTup();        Text value = new Text();        @Override        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {            String[] split = value.toString().split("\t");            int length = split.length;            this.key.set(split[0],"0");            StringBuffer buffer =new StringBuffer();            for (int i=1 ;i < length;i++){                buffer.append(split[i]+"\t");            }            this.value.set(buffer.toString());            context.write(this.key,this.value);        }    }    public static class ReUserOfGoodsMapper extends Mapper<LongWritable,Text,TextTup,Text> {        TextTup key = new TextTup();        Text value = new Text();        @Override        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {            String[] split = value.toString().split("\t");            int length = split.length;            this.key.set(split[0],"1");            StringBuffer buffer =new StringBuffer();            for (int i=1 ;i < length;i++){                buffer.append(split[i]+"\t");            }            this.value.set(buffer.toString());            context.write(this.key,this.value);        }    }    public static class CounterReducer extends Reducer<TextTup,Text,NullWritable,Text> {        private Text value = new Text();        @Override        protected void reduce(TextTup key, Iterable<Text> values, Context context) throws IOException, InterruptedException {            NullWritable nullWritable = NullWritable.get();            Iterator<Text> iterator = values.iterator();            Text goodOfGood = new Text(iterator.next());            while (iterator.hasNext()){                Text userOfGoods = iterator.next();                this.value.set(goodOfGood+","+userOfGoods);                context.write(nullWritable,this.value);            }        }    }    //job5求值,看哪个物品推荐指数是多少    public static class RideMapper extends Mapper<LongWritable,Text,Text,IntWritable> {        Text key = new Text();        IntWritable value = new IntWritable();        @Override        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {            String[] split = value.toString().split(",");            String[] split1 = split[0].split("\t");            String[] split2 = split[1].split("\t");            int length1 = split1.length;            int length2 = split2.length;            for (int i=0;i<length1;i++){                for (int j=0;j<length2;j++){                    String[] split3 = split1[i].split(":");                    String[] split4 = split2[j].split(":");                    this.value.set(Integer.valueOf(split3[1])*Integer.valueOf(split4[1]));                    if (Integer.valueOf(split3[0])-Integer.valueOf(split4[0])>0){                        this.key.set(split4[0]+"\t"+split3[0]);                    }                    else{                        this.key.set(split3[0]+"\t"+split4[0]);                    }                    context.write(this.key,this.value);                }            }        }    }    public static class RideReducer extends Reducer<Text,IntWritable,Text,IntWritable> {        int sum=0;        @Override        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {            for(IntWritable num : values){                sum+=num.get();            }            context.write(key,new IntWritable(sum));            sum=0;        }    }    //job6去除已经购买好的东西,推荐还未购买的。    public static class MatrixMapper extends Mapper<LongWritable, Text, Text, Text> {        Text key = new Text();        Text value = new Text();        @Override        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {            String[] split = value.toString().split(" ");            this.key.set(split[0] + "\t" + split[1]);            this.value.set(split[2]);            context.write(this.key,this.value);        }    }    public static class RideCounterMapper extends Mapper<LongWritable, Text, Text, Text> {        Text key = new Text();        Text value = new Text();        @Override        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {            String[] split = value.toString().split("[\t]");            if(split.length>=3){                this.key.set(split[0]+"\t"+split[1]);                this.value.set(split[2]);                System.out.println("RecommendAlphMapper: "+key.toString());                context.write(this.key,this.value);            }        }    }    public static class RebuildReducer extends Reducer<Text, Text, Text, Text> {    @Override        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {//            int count=0;//            Text value=null;//            for(Text i:values){//              count++;//                value = new Text(i.toString());//            }//            if(count==1){//                context.write(key,value);//            }            Iterator<Text> iterator =values.iterator();            Text value = new Text();            value=iterator.next();            if (!iterator.hasNext()){                context.write(key,value);            }        }    }    @Override    public int run(String[] strings) throws Exception {        //这里将会使用的到job的流程化作业        Configuration conf = getConf();        //添加各自的Path路径        Path job0Input=new Path("F:\\hadoop\\data\\SeachGoods\\matrix");        Path job0Output=new Path("F:\\hadoop\\result\\Recomend\\getGoodOfUsers");        Path job1Input=new Path("F:\\hadoop\\data\\SeachGoods\\matrix");        Path job1Output=new Path("F:\\hadoop\\result\\Recomend\\getUserOfGoods");        Path job2Input=new Path("F:\\hadoop\\result\\Recomend\\getUserOfGoods\\part-r-00000");        Path job2Output=new Path("F:\\hadoop\\result\\Recomend\\GetGoodsOfGoodsLink");        Path job3Input=new Path("F:\\hadoop\\result\\Recomend\\GetGoodsOfGoodsLink\\part-r-00000");        Path job3Output=new Path("F:\\hadoop\\result\\Recomend\\GetGoodsOfGoodsLinkView");        Path job4InputA = new Path("F:\\hadoop\\result\\Recomend\\GetGoodsOfGoodsLinkView\\part-r-00000");        Path job4InputB = new Path("F:\\hadoop\\result\\Recomend\\getGoodOfUsers\\part-r-00000");        Path job4Output = new Path("F:\\hadoop\\result\\Recomend\\Counter");        Path job5Input = new Path("F:\\hadoop\\result\\Recomend\\Counter\\part-r-00000");        Path job5Output = new Path("F:\\hadoop\\result\\Recomend\\RideCounter");        Path job6InputA = new Path("F:\\hadoop\\data\\SeachGoods\\matrix");        Path job6InputB = new Path("F:\\hadoop\\result\\Recomend\\RideCounter\\part-r-00000");        Path job6Output = new Path("F:\\hadoop\\result\\Recomend\\Recommend");        //构建各自的job        //构建job0        Job job0=Job.getInstance(conf, "job0");        job0.setJarByClass(this.getClass());        job0.setMapperClass(GetRecommend.PlinksMapper.class);        job0.setMapOutputKeyClass(Text.class);        job0.setMapOutputValueClass(Text.class);        job0.setReducerClass(GetRecommend.PlinksReducer.class);        job0.setOutputKeyClass(Text.class);        job0.setOutputValueClass(Text.class);        job0.setInputFormatClass(TextInputFormat.class);        job0.setOutputFormatClass(TextOutputFormat.class);        TextInputFormat.addInputPath(job0,job0Input);        TextOutputFormat.setOutputPath(job0,job0Output);        //构建job1        Job job1=Job.getInstance(conf,"job1");        job1.setJarByClass(this.getClass());        job1.setMapperClass(UserOfGoodsMapper.class);        job1.setMapOutputKeyClass(Text.class);        job1.setMapOutputValueClass(Text.class);        job1.setReducerClass(UserOfGoodsReducer.class);        job1.setOutputKeyClass(Text.class);        job1.setOutputValueClass(Text.class);        job1.setInputFormatClass(TextInputFormat.class);        job1.setOutputFormatClass(TextOutputFormat.class);        TextInputFormat.addInputPath(job1,job1Input);        TextOutputFormat.setOutputPath(job1,job1Output);        //job2创建        Job job2=Job.getInstance(conf, "job2");        job2.setJarByClass(this.getClass());        job2.setMapperClass(GoodsOfGoodsMapper.class);        job2.setMapOutputKeyClass(Text.class);        job2.setMapOutputValueClass(IntWritable.class);        job2.setReducerClass(GoodsOfGoodsReducer.class);        job2.setOutputKeyClass(Text.class);        job2.setOutputValueClass(IntWritable.class);        job2.setInputFormatClass(TextInputFormat.class);        job2.setOutputFormatClass(TextOutputFormat.class);        TextInputFormat.addInputPath(job2,job2Input);        TextOutputFormat.setOutputPath(job2,job2Output);        //job3的创建        Job job3=Job.getInstance(conf,"job3");        job3.setJarByClass(this.getClass());        job3.setMapperClass(GoodsOfGoodsViewMapper.class);        job3.setMapOutputKeyClass(Text.class);        job3.setMapOutputValueClass(Text.class);        job3.setReducerClass(GoodsOfGoodsViewReducer.class);        job3.setOutputKeyClass(Text.class);        job3.setOutputValueClass(Text.class);        job3.setInputFormatClass(TextInputFormat.class);        job3.setOutputFormatClass(TextOutputFormat.class);        TextInputFormat.addInputPath(job3,job3Input);        TextOutputFormat.setOutputPath(job3,job3Output);        //job4的创建(两个mapper读)        Job job4=Job.getInstance(conf, "job4");        job4.setJarByClass(this.getClass());        job4.setMapOutputKeyClass(TextTup.class);        job4.setMapOutputValueClass(Text.class);        MultipleInputs.addInputPath(job4,job4InputA,TextInputFormat.class,ReGoodsOfGoodsMapper.class);        MultipleInputs.addInputPath(job4,job4InputB,TextInputFormat.class,ReUserOfGoodsMapper.class);        job4.setReducerClass(CounterReducer.class);        job4.setOutputKeyClass(NullWritable.class);        job4.setOutputValueClass(Text.class);        job4.setOutputFormatClass(TextOutputFormat.class);        job4.setGroupingComparatorClass(TextTupGroupComparator.class);        job4.setSortComparatorClass(TextTupSortComparator.class);        TextOutputFormat.setOutputPath(job4,job4Output);        //构建job5        Job job5=Job.getInstance(conf, "job5");        job5.setJarByClass(this.getClass());        job5.setMapperClass(RideMapper.class);        job5.setMapOutputKeyClass(Text.class);        job5.setMapOutputValueClass(IntWritable.class);        job5.setReducerClass(RideReducer.class);        job5.setOutputKeyClass(Text.class);        job5.setOutputValueClass(IntWritable.class);        job5.setInputFormatClass(TextInputFormat.class);        job5.setOutputFormatClass(TextOutputFormat.class);        TextInputFormat.addInputPath(job5,job5Input);        TextOutputFormat.setOutputPath(job5,job5Output);        //构建job6        Job job6 = Job.getInstance(conf, "job6");        job6.setJarByClass(this.getClass());        job6.setMapOutputKeyClass(Text.class);        job6.setMapOutputValueClass(Text.class);        MultipleInputs.addInputPath(job6,job6InputA,TextInputFormat.class,MatrixMapper.class);        MultipleInputs.addInputPath(job6,job6InputB,TextInputFormat.class,RideCounterMapper.class);        job6.setReducerClass(RebuildReducer.class);        job6.setOutputKeyClass(Text.class);        job6.setOutputValueClass(Text.class);        TextOutputFormat.setOutputPath(job6, job6Output);        //各job的执行顺序控制        ControlledJob controlledJob0=new ControlledJob(job0.getConfiguration());        controlledJob0.setJob(job0);        ControlledJob controlledJob1=new ControlledJob(job1.getConfiguration());        controlledJob1.setJob(job1);        ControlledJob controlledJob2=new ControlledJob(job2.getConfiguration());        controlledJob2.setJob(job2);        ControlledJob controlledJob3=new ControlledJob(job3.getConfiguration());        controlledJob3.setJob(job3);        ControlledJob controlledJob4=new ControlledJob(job4.getConfiguration());        controlledJob4.setJob(job4);        ControlledJob controlledJob5=new ControlledJob(job5.getConfiguration());        controlledJob5.setJob(job5);        ControlledJob controlledJob6=new ControlledJob(job6.getConfiguration());        controlledJob6.setJob(job6);        //作业间的依赖关系        controlledJob2.addDependingJob(controlledJob1);        controlledJob3.addDependingJob(controlledJob2);        controlledJob4.addDependingJob(controlledJob3);        controlledJob4.addDependingJob(controlledJob0);        controlledJob5.addDependingJob(controlledJob4);        controlledJob6.addDependingJob(controlledJob5);        //        JobControl jobControl = new JobControl(this.getClass().getSimpleName());        jobControl.addJob(controlledJob0);        jobControl.addJob(controlledJob1);        jobControl.addJob(controlledJob2);        jobControl.addJob(controlledJob3);        jobControl.addJob(controlledJob4);        jobControl.addJob(controlledJob5);        jobControl.addJob(controlledJob6);        //以下的代码部分是重复使用的        new Thread(jobControl).start();        while (true) {            for (ControlledJob cj : jobControl.getRunningJobList()) {                cj.getJob().monitorAndPrintJob();            }            if (jobControl.allFinished())            {break;}        }     return 0;    }}

原创粉丝点击