贝叶斯算法 mapreduce实现
来源:互联网 发布:软件的设计阶段 编辑:程序博客网 时间:2024/04/28 22:30
/**
* 找到抽样用户的特征,得到每个特征的概率
* 输入:属性1 属性2 属性3 属性4 类别
* 命令:hadoop jar recommend_cf.jar com.funshion.machine.bayes.Bayes2 /dw/logs/user/xincl/bayes.txt /dw/logs/recommend/result/machine/Bayes2
* 输出:
* @author clxin
*
*/
public class Bayes2 extends Configured implements Tool {
// 输出 : mac media
public static class BayesMapper extends MapReduceBase implements
Mapper<LongWritable, Text, StringSecondSortAsce, Text> {
StringSecondSortAsce tKey = new StringSecondSortAsce();
Text tValue = new Text();
@Override
public void map(LongWritable key, Text value,
OutputCollector<StringSecondSortAsce, Text> output,
Reporter report) throws IOException {
String [] strArr = value.toString().split("\t");
for(int i=0;i<strArr.length-1;i++){
tKey.set(i+"\t"+strArr[strArr.length-1],strArr[i]);
tValue.set(strArr[i]);
output.collect(tKey, tValue);
}
}
}
// 输入:mac tag1 观影次数
public static class BayesReducer extends MapReduceBase implements
Reducer<StringSecondSortAsce, Text, Text, Text> {
int count = 0;
Text tKey = new Text();
Text tValue = new Text();
@Override
public void reduce(StringSecondSortAsce key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter report)
throws IOException {
int pCcount = 1;
int pXcount = 1;
Map xMap = new HashMap<String,String>();
String tmpValue=values.next().toString();
while(values.hasNext()){
pCcount++;
String newValue=values.next().toString();
if(!tmpValue.equals(newValue)){
xMap.put(tmpValue, pXcount);
tmpValue = newValue;
pXcount=1;
}else{
pXcount++;
}
}
xMap.put(tmpValue, pXcount);
Set<Entry<String, String>> sets = xMap.entrySet();
for (Entry<String, String> entry : sets) {
String [] xValue = key.getFirst().split("\t");
tKey.set("p("+xValue[0]+"="+entry.getKey()+"|"+"class="+xValue[1]+
")");
Object ob = entry.getValue();
tValue.set(String.valueOf(1.0*Integer.parseInt(ob.toString())/pCcount));
output.collect(tKey, tValue);
}
}
}
public int run(String[] args) throws Exception {
Configuration conf = getConf();
JobConf jobConf = new JobConf(conf, Bayes2.class);
jobConf.setJobName("Bayes2");
jobConf.setNumReduceTasks(1);
jobConf.setMapOutputKeyClass(StringSecondSortAsce.class);
jobConf.setMapOutputValueClass(Text.class);
jobConf.setOutputKeyClass(Text.class);
jobConf.setOutputValueClass(Text.class);
jobConf.setMapperClass(BayesMapper.class);
jobConf.setReducerClass(BayesReducer.class);
jobConf.setInputFormat(TextInputFormat.class);
jobConf.setOutputFormat(TextOutputFormat.class);
jobConf.setPartitionerClass(FirstPartitioner.class);
jobConf.setOutputValueGroupingComparator(FirstGroupingComparator.class);
FileInputFormat.addInputPath(jobConf, new Path(args[0]));
FileOutputFormat.setOutputPath(jobConf, new Path(args[1]));
JobClient.runJob(jobConf);
return 0;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new Bayes2(), args);
System.exit(exitCode);
}
}
* 找到抽样用户的特征,得到每个特征的概率
* 输入:属性1 属性2 属性3 属性4 类别
* 命令:hadoop jar recommend_cf.jar com.funshion.machine.bayes.Bayes2 /dw/logs/user/xincl/bayes.txt /dw/logs/recommend/result/machine/Bayes2
* 输出:
* @author clxin
*
*/
public class Bayes2 extends Configured implements Tool {
// 输出 : mac media
public static class BayesMapper extends MapReduceBase implements
Mapper<LongWritable, Text, StringSecondSortAsce, Text> {
StringSecondSortAsce tKey = new StringSecondSortAsce();
Text tValue = new Text();
@Override
public void map(LongWritable key, Text value,
OutputCollector<StringSecondSortAsce, Text> output,
Reporter report) throws IOException {
String [] strArr = value.toString().split("\t");
for(int i=0;i<strArr.length-1;i++){
tKey.set(i+"\t"+strArr[strArr.length-1],strArr[i]);
tValue.set(strArr[i]);
output.collect(tKey, tValue);
}
}
}
// 输入:mac tag1 观影次数
public static class BayesReducer extends MapReduceBase implements
Reducer<StringSecondSortAsce, Text, Text, Text> {
int count = 0;
Text tKey = new Text();
Text tValue = new Text();
@Override
public void reduce(StringSecondSortAsce key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter report)
throws IOException {
int pCcount = 1;
int pXcount = 1;
Map xMap = new HashMap<String,String>();
String tmpValue=values.next().toString();
while(values.hasNext()){
pCcount++;
String newValue=values.next().toString();
if(!tmpValue.equals(newValue)){
xMap.put(tmpValue, pXcount);
tmpValue = newValue;
pXcount=1;
}else{
pXcount++;
}
}
xMap.put(tmpValue, pXcount);
Set<Entry<String, String>> sets = xMap.entrySet();
for (Entry<String, String> entry : sets) {
String [] xValue = key.getFirst().split("\t");
tKey.set("p("+xValue[0]+"="+entry.getKey()+"|"+"class="+xValue[1]+
")");
Object ob = entry.getValue();
tValue.set(String.valueOf(1.0*Integer.parseInt(ob.toString())/pCcount));
output.collect(tKey, tValue);
}
}
}
public int run(String[] args) throws Exception {
Configuration conf = getConf();
JobConf jobConf = new JobConf(conf, Bayes2.class);
jobConf.setJobName("Bayes2");
jobConf.setNumReduceTasks(1);
jobConf.setMapOutputKeyClass(StringSecondSortAsce.class);
jobConf.setMapOutputValueClass(Text.class);
jobConf.setOutputKeyClass(Text.class);
jobConf.setOutputValueClass(Text.class);
jobConf.setMapperClass(BayesMapper.class);
jobConf.setReducerClass(BayesReducer.class);
jobConf.setInputFormat(TextInputFormat.class);
jobConf.setOutputFormat(TextOutputFormat.class);
jobConf.setPartitionerClass(FirstPartitioner.class);
jobConf.setOutputValueGroupingComparator(FirstGroupingComparator.class);
FileInputFormat.addInputPath(jobConf, new Path(args[0]));
FileOutputFormat.setOutputPath(jobConf, new Path(args[1]));
JobClient.runJob(jobConf);
return 0;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new Bayes2(), args);
System.exit(exitCode);
}
}
0 0
- 贝叶斯算法 mapreduce实现
- Mapreduce pairs算法 实现
- Mapreduce Strips算法 实现
- OpenCL实现MapReduce算法
- PageRank算法实现------MapReduce
- 使用MapReduce实现Pagerank算法
- 使用MapReduce实现遗传算法
- PageRank算法的MapReduce实现
- pagerank算法的MapReduce实现
- MapReduce 全排列算法实现
- 使用MapReduce实现PageRank算法
- 使用MapReduce实现knn算法
- 使用MapReduce实现Bayes算法
- MapReduce之推荐算法实现
- MapReduce编程-join算法实现
- PageRank算法及MapReduce实现
- 贝叶斯算法在mapreduce集群上的具体实现
- 算法系列:PageRank算法的MapReduce实现
- STL1——顺序容器和顺序容器适配器
- 逻辑回归(LR)算法java实现
- Java泛型中E、T、K、V等的含义
- LeetCode(237)Delete Node in a Linked List
- Python 各种集合内置方法的时间复杂度
- 贝叶斯算法 mapreduce实现
- XML(1)——邂逅XML
- 嵌入式Linux内核制作
- 关于字符串加密程序中的一些问题
- linux kernel的中断子系统之:softirq
- 初识Java,基本名字的了解(摘自网络)
- 让人自由才是真的好
- VC CFont 用法
- poll函数