分类算法--贝叶斯分类法(Maprdecue实现)代码实现<转>
来源:互联网 发布:陆家嘴 知乎 编辑:程序博客网 时间:2024/04/29 10:33
================================input.txt=======================================
youth high no fair no
youth high no excellent no
middle high no fair yes
senior medium no fair yes
senior low yes fair yes
senior low yes excellent no
middle low yes excellent yes
youth medium no fair no
youth low yes fair yes
senior medium yes fair yes
youth medium yes excellent yes
middle medium no excellent yes
middle high yes fair yes
senior medium no excellent no
====================================================================
package com.mahout.bayes;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import com.mahout.test.FirstGroupingComparator;
import com.mahout.test.StringStringPairAsce;
import com.mahout.test.ItemBasePass1.FirstPartitioner;
/**
* 贝叶斯算法实现
* @author clxin
*
*/
public class Bayes extends Configured implements Tool {
/**
* 把(x1,x2,..,xn,C)转换为
* C A1 x1
* C A1 x2
* @author clxin
*/
public static class BayesMapper extends MapReduceBase implements
Mapper<LongWritable, Text, StringStringPairAsce, Text> {
private StringStringPairAsce tKey = new StringStringPairAsce();
private Text tValue = new Text();
public void map(LongWritable key, Text value,
OutputCollector<StringStringPairAsce, Text> output, Reporter arg3)
throws IOException {
String [] strArr = value.toString().split("\t");
tKey.set("age"+"\t"+strArr[strArr.length-1],strArr[0]);
tValue.set(strArr[0]);
output.collect(tKey, tValue);
tKey.set("income"+"\t"+strArr[strArr.length-1],strArr[1]);
tValue.set(strArr[1]);
output.collect(tKey, tValue);
tKey.set("student"+"\t"+strArr[strArr.length-1],strArr[2]);
tValue.set(strArr[2]);
output.collect(tKey, tValue);
tKey.set("credit_rating"+"\t"+strArr[strArr.length-1],strArr[3]);
tValue.set(strArr[3]);
output.collect(tKey, tValue);
}
}
public static class BayesReducer extends MapReduceBase implements
Reducer<StringStringPairAsce, Text, Text, Text> {
Text tKey = new Text();
Text tValue= new Text();
@Override
public void reduce(StringStringPairAsce key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
int pCcount = 1;
int pXcount = 1;
Map xMap = new HashMap<String,String>();
String tmpValue=values.next().toString();
while(values.hasNext()){
pCcount++;
String newValue=values.next().toString();
if(!tmpValue.equals(newValue)){
xMap.put(tmpValue, pXcount);
tmpValue = newValue;
pXcount=1;
}else{
pXcount++;
}
}
xMap.put(tmpValue, pXcount);
Set<Entry<String, String>> sets = xMap.entrySet();
for (Entry<String, String> entry : sets) {
tKey.set(key.getFirst() + "\t" + entry.getKey());
String [] xValue = key.getFirst().split("\t");
Object ob = entry.getValue();
tValue.set(pCcount+"\t"+ob.toString());
System.out.println("p("+xValue[0]+"="+entry.getKey()+"|"+"class="+xValue[1]+
")="+ob.toString()+"/"+pCcount);
output.collect(tKey, tValue);
}
}
}
public static class FirstPartitioner implements
Partitioner<StringStringPairAsce, Text> {
@Override
public int getPartition(StringStringPairAsce key, Text value,
int numPartitions) {
return key.getFirst().hashCode() & Integer.MAX_VALUE
% numPartitions;
}
@Override
public void configure(JobConf job) {
}
}
@Override
public int run(String[] args) throws Exception {
JobConf conf = new JobConf(getConf(), Bayes.class);
conf.setJobName("Bayes");
//conf.setNumMapTasks(200);
// 设置Map输出的key和value的类型
conf.setMapOutputKeyClass(StringStringPairAsce.class);
conf.setMapOutputValueClass(Text.class);
// 设置Reduce输出的key和value的类型
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
// 设置Mapper和Reducer
conf.setMapperClass(BayesMapper.class);
conf.setReducerClass(BayesReducer.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
conf.setPartitionerClass(FirstPartitioner.class);
conf.setOutputValueGroupingComparator(FirstGroupingComparator.class);
// 设置输入输出目录
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
return 0;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new Bayes(), args);
System.exit(exitCode);
}
}
youth high no fair no
youth high no excellent no
middle high no fair yes
senior medium no fair yes
senior low yes fair yes
senior low yes excellent no
middle low yes excellent yes
youth medium no fair no
youth low yes fair yes
senior medium yes fair yes
youth medium yes excellent yes
middle medium no excellent yes
middle high yes fair yes
senior medium no excellent no
====================================================================
package com.mahout.bayes;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import com.mahout.test.FirstGroupingComparator;
import com.mahout.test.StringStringPairAsce;
import com.mahout.test.ItemBasePass1.FirstPartitioner;
/**
* 贝叶斯算法实现
* @author clxin
*
*/
public class Bayes extends Configured implements Tool {
/**
* 把(x1,x2,..,xn,C)转换为
* C A1 x1
* C A1 x2
* @author clxin
*/
public static class BayesMapper extends MapReduceBase implements
Mapper<LongWritable, Text, StringStringPairAsce, Text> {
private StringStringPairAsce tKey = new StringStringPairAsce();
private Text tValue = new Text();
public void map(LongWritable key, Text value,
OutputCollector<StringStringPairAsce, Text> output, Reporter arg3)
throws IOException {
String [] strArr = value.toString().split("\t");
tKey.set("age"+"\t"+strArr[strArr.length-1],strArr[0]);
tValue.set(strArr[0]);
output.collect(tKey, tValue);
tKey.set("income"+"\t"+strArr[strArr.length-1],strArr[1]);
tValue.set(strArr[1]);
output.collect(tKey, tValue);
tKey.set("student"+"\t"+strArr[strArr.length-1],strArr[2]);
tValue.set(strArr[2]);
output.collect(tKey, tValue);
tKey.set("credit_rating"+"\t"+strArr[strArr.length-1],strArr[3]);
tValue.set(strArr[3]);
output.collect(tKey, tValue);
}
}
public static class BayesReducer extends MapReduceBase implements
Reducer<StringStringPairAsce, Text, Text, Text> {
Text tKey = new Text();
Text tValue= new Text();
@Override
public void reduce(StringStringPairAsce key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
int pCcount = 1;
int pXcount = 1;
Map xMap = new HashMap<String,String>();
String tmpValue=values.next().toString();
while(values.hasNext()){
pCcount++;
String newValue=values.next().toString();
if(!tmpValue.equals(newValue)){
xMap.put(tmpValue, pXcount);
tmpValue = newValue;
pXcount=1;
}else{
pXcount++;
}
}
xMap.put(tmpValue, pXcount);
Set<Entry<String, String>> sets = xMap.entrySet();
for (Entry<String, String> entry : sets) {
tKey.set(key.getFirst() + "\t" + entry.getKey());
String [] xValue = key.getFirst().split("\t");
Object ob = entry.getValue();
tValue.set(pCcount+"\t"+ob.toString());
System.out.println("p("+xValue[0]+"="+entry.getKey()+"|"+"class="+xValue[1]+
")="+ob.toString()+"/"+pCcount);
output.collect(tKey, tValue);
}
}
}
public static class FirstPartitioner implements
Partitioner<StringStringPairAsce, Text> {
@Override
public int getPartition(StringStringPairAsce key, Text value,
int numPartitions) {
return key.getFirst().hashCode() & Integer.MAX_VALUE
% numPartitions;
}
@Override
public void configure(JobConf job) {
}
}
@Override
public int run(String[] args) throws Exception {
JobConf conf = new JobConf(getConf(), Bayes.class);
conf.setJobName("Bayes");
//conf.setNumMapTasks(200);
// 设置Map输出的key和value的类型
conf.setMapOutputKeyClass(StringStringPairAsce.class);
conf.setMapOutputValueClass(Text.class);
// 设置Reduce输出的key和value的类型
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
// 设置Mapper和Reducer
conf.setMapperClass(BayesMapper.class);
conf.setReducerClass(BayesReducer.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
conf.setPartitionerClass(FirstPartitioner.class);
conf.setOutputValueGroupingComparator(FirstGroupingComparator.class);
// 设置输入输出目录
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
return 0;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new Bayes(), args);
System.exit(exitCode);
}
}
0 0
- 分类算法--贝叶斯分类法(Maprdecue实现)代码实现<转>
- 分类算法--贝叶斯分类法(Maprdecue实现)
- 分类算法--贝叶斯分类法(Maprdecue实现)<转>
- 数据挖掘分类算法之贝叶斯分类法原理及C++实现
- 贝叶斯分类算法,python实现
- 朴素贝叶斯算法实现分类问题(三类)matlab代码
- 逻辑回归和朴素贝叶斯算法实现二值分类(matlab代码)
- 有监督分类:概率分类法(Logistic)
- AdaBoost分类算法实现
- 数据分类K—means 算法的python代码实现
- 朴素贝叶斯实现文本分类部分代码(2)
- 【JAVA实现】朴素贝叶斯分类算法
- java实现朴素贝叶斯分类算法
- 朴素贝叶斯分类算法的Python实现
- 朴素贝叶斯分类算法python实现
- 左右值无限分类实现算法[转]
- 左右值无限分类实现算法[转]
- 模式识别几何分类算法实现(一)
- hbase基本的表管理和访问
- 数据挖掘--kmeans聚类算法mapreduce实现代码<转>
- Android中FLAG_ACTIVITY_CLEAR_TASK的作用
- 分类算法--贝叶斯分类法(Maprdecue实现)<转>
- 分区索引碎片整理Script
- 分类算法--贝叶斯分类法(Maprdecue实现)代码实现<转>
- UVA 11072 - Points(凸包+点在多边形内判定)
- C++ 给函数传递参数的两种方式
- android 侦听home键
- 主成分分析(Principal components analysis)
- ERP和其他管理软件之间的逻辑关系
- hdu 1200 To and Fro
- POJ 3076 Sudoku (dancing links)
- hdu 4612 Warm up 双连通缩点+树的直径