数据挖掘-NaiveBeyes算法实现

来源:互联网 发布:用友远程软件 编辑:程序博客网 时间:2024/06/05 15:12
<pre name="code" class="java">import java.io.BufferedReader;import java.io.FileReader;import java.io.IOException;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.Map;public class NaiveBeyes {List<String> data_var=new ArrayList<String>(); //输入的变量数据List<String> data_tag=new ArrayList<String>();//输入的类别数据public NaiveBeyes() throws IOException{//函数作用:数据载入BufferedReader br=new BufferedReader(new FileReader("F:/数据挖掘--算法实现/NaiveBeyes算法/input.txt"));        String line="";        int i=0;        while((line=br.readLine())!=null){        String[] tmp=line.split(" ",2);        if(i==0){i=1;continue;}        this.data_tag.add(tmp[0]);        this.data_var.add(tmp[1]);     }}public String NaiveBeyes_method(String var_predict){//函数作用:贝叶斯分类主方法,输入需要分类的变量,输出预测的类别String tag_predict="";String[] var=var_predict.split(" ");Map<String,Integer> map=new HashMap<String,Integer>();Iterator<String> Iter=this.data_tag.iterator();while(Iter.hasNext()){String tmp=Iter.next();if(map.containsKey(tmp)){int value=map.get(tmp);map.remove(tmp);map.put(tmp, value+1);}else{map.put(tmp, 1);}  //map:用来存储训练数集中每一个类的总数量,格式为,类:数量}float max_p=0;Iterator<String> Iter_map=map.keySet().iterator();while(Iter_map.hasNext()){//计算需要预测变量分到每一个类的相对条件概率值P(X|Ci)P(Ci)float p=1;String tmp_tag=Iter_map.next();for(int j=0;j<var.length;j++){int count=0;for(int i=0;i<this.data_var.size();i++){String[] tmp=this.data_var.get(i).split(" ");if((tmp[j].equals(var[j])) && (this.data_tag.get(i).equals(tmp_tag))){count++;}//计算预测变量中每一个子变量的条件概率(即子变量在当前类tmp_tag中出现的概率=count/map.get(tmp_tag) )}p=p*((float)count)/map.get(tmp_tag);//每一个子变量相乘即为该类tmp_tag的条件概率值 P(X|Ci)}p=p*((float)map.get(tmp_tag))/this.data_var.size();//该类tmp_tag的相对条件概率值 P(X|Ci)P(Ci)if(max_p<p){max_p=p;tag_predict=tmp_tag;} //如果当前计算类tmp_tag=Iter_map.next()的相对条件概率值高,则更新max_p,预测类别tag_predict更新为tmp_tagSystem.out.println("分到"+tmp_tag+"的条件概率相对数为:"+p);}return tag_predict;}public static void main(String[] args) throws IOException {NaiveBeyes a=new NaiveBeyes();System.out.println("变量预测为类别:"+a.NaiveBeyes_method("youth medium yes fair"));}}

训练样本:

类别 变量
no youth high no fair
no youth high no excellent
yes middle_aged high no fair
yes senior medium no fair
yes senior low yes fair
no senior low yes excellent
yes middle_aged low yes excellent
no youth medium no fair
yes youth low yes fair
yes senior medium yes fair
yes youth medium yes excellent
yes middle_aged medium no excellent
yes middle_aged high yes fair
no senior medium no excellent

输入预测变量:

"youth medium yes fair"


输出结果:

分到no的条件概率相对数为:0.006857143
分到yes的条件概率相对数为:0.028218696
变量预测为类别:yes

0 0
原创粉丝点击