贝叶斯算法 mapreduce实现

来源：互联网发布：软件的设计阶段编辑：程序博客网时间：2024/04/28 22:30

/**
* 找到抽样用户的特征,得到每个特征的概率
* 输入：属性1   属性2   属性3   属性4    类别
* 命令：hadoop jar recommend_cf.jar com.funshion.machine.bayes.Bayes2 /dw/logs/user/xincl/bayes.txt /dw/logs/recommend/result/machine/Bayes2
* 输出：
* @author clxin
*
*/

public class Bayes2 extends Configured implements Tool {

   // 输出： mac media
   public static class BayesMapper extends MapReduceBase implements
           Mapper<LongWritable, Text, StringSecondSortAsce, Text> {
       StringSecondSortAsce tKey = new StringSecondSortAsce();
       Text tValue = new Text();

       @Override
       public void map(LongWritable key, Text value,
               OutputCollector<StringSecondSortAsce, Text> output,
               Reporter report) throws IOException {
           String [] strArr = value.toString().split("\t");
           for(int i=0;i<strArr.length-1;i++){
               tKey.set(i+"\t"+strArr[strArr.length-1],strArr[i]);
               tValue.set(strArr[i]);
               output.collect(tKey, tValue);
           }
       }
   }

   // 输入：mac    tag1         观影次数
   public static class BayesReducer extends MapReduceBase implements
           Reducer<StringSecondSortAsce, Text, Text, Text> {
       int count = 0;
       Text tKey = new Text();
       Text tValue = new Text();

       @Override
       public void reduce(StringSecondSortAsce key, Iterator<Text> values,
               OutputCollector<Text, Text> output, Reporter report)
               throws IOException {
           int pCcount = 1;
           int pXcount = 1;
           Map xMap = new HashMap<String,String>();

           String tmpValue=values.next().toString();
           while(values.hasNext()){
               pCcount++;
               String newValue=values.next().toString();
               if(!tmpValue.equals(newValue)){
                   xMap.put(tmpValue, pXcount);
                   tmpValue = newValue;
                   pXcount=1;
               }else{
                   pXcount++;
               }
           }
           xMap.put(tmpValue, pXcount);

           Set<Entry<String, String>> sets = xMap.entrySet();
           for (Entry<String, String> entry : sets) {

               String [] xValue = key.getFirst().split("\t");
               tKey.set("p("+xValue[0]+"="+entry.getKey()+"|"+"class="+xValue[1]+
                       ")");
               Object ob = entry.getValue();
               tValue.set(String.valueOf(1.0*Integer.parseInt(ob.toString())/pCcount));

               output.collect(tKey, tValue);
           }
       }
   }

   public int run(String[] args) throws Exception {
       Configuration conf = getConf();

       JobConf jobConf = new JobConf(conf, Bayes2.class);
       jobConf.setJobName("Bayes2");
       jobConf.setNumReduceTasks(1);

       jobConf.setMapOutputKeyClass(StringSecondSortAsce.class);
       jobConf.setMapOutputValueClass(Text.class);

       jobConf.setOutputKeyClass(Text.class);
       jobConf.setOutputValueClass(Text.class);

       jobConf.setMapperClass(BayesMapper.class);
       jobConf.setReducerClass(BayesReducer.class);

       jobConf.setInputFormat(TextInputFormat.class);
       jobConf.setOutputFormat(TextOutputFormat.class);
       jobConf.setPartitionerClass(FirstPartitioner.class);
       jobConf.setOutputValueGroupingComparator(FirstGroupingComparator.class);

       FileInputFormat.addInputPath(jobConf, new Path(args[0]));
       FileOutputFormat.setOutputPath(jobConf, new Path(args[1]));

       JobClient.runJob(jobConf);
       return 0;
   }

   public static void main(String[] args) throws Exception {
       int exitCode = ToolRunner.run(new Bayes2(), args);
       System.exit(exitCode);
   }

}

0 0