sklearn学习之贝叶斯分类

来源:互联网 发布:凡科建站怎么监测数据 编辑:程序博客网 时间:2024/06/15 04:58

主要是介绍贝叶斯分类器
1.高斯贝叶斯分类器
class sklearn.naive_bayes.GaussianNB
高斯贝叶斯分类器没有参数

2.多项式贝叶斯分类器
class sklearn.naive_bayes.MultinomialNB(alpha=1.0,fit_prior=True,class_prior=None)
参数含义如下:
alpha:一个浮点数,指定alpha的值
fit_prior:布尔值,如果为Ture,则不用去学习P(y=ck),以均匀分布替代,否则则去学习P(y=ck)
class_prior:一个数组。它指定了每个分类的先验概率P(y=c1),P(y=c2)…..,若指定了该参数
则每个分类的先验概率无需学习
伯努利贝叶斯分类器
class sklearn.naive_bayes.BernoulliNB(alpha=1.0,binarize=0.0,fit_prior=Ture,
class_prior=None)
参数含义如下:
alpha:一个浮点数,指定alpha的值
binarize:一个浮点数或者None
如果为浮点数则以该数值为界,特征值大于它的取1,小于的为0
如果为None,假定原始数据已经二值化
fit_prior:布尔值,如果为Ture,则不用去学习P(y=ck),以均匀分布替代,否则则去学习P(y=ck)
class_prior:一个数组。它指定了每个分类的先验概率P(y=c1),P(y=c2)…..,若指定了该参数
则每个分类的先验概率无需学习

from sklearn import datasets,cross_validation,naive_bayesimport numpy as npimport matplotlib.pyplot as pltdef show_digits():    digits=datasets.load_digits()    fig=plt.figure()    print('vector from image 0:',digits.data[0])    for i in range(25):        ax=fig.add_subplot(5,5,i+1)        ax.imshow(digits.images[i],cmap=plt.cm.gray_r,interpolation='nearest')    plt.show()def load_data():    digits=datasets.load_digits()    return cross_validation.train_test_split(digits.data,digits.target,test_size=0.25,                                             random_state=0)#用高斯分类器来查看效果def test_GaussianNB(*data):    X_train,X_test,y_train,y_test=data    cls=naive_bayes.GaussianNB()    cls.fit(X_train,y_train)    print("training score:%.2f"%(cls.score(X_train,y_train)))    print("testing score:%.2f"%(cls.score(X_test,y_test)))#测试多项式贝叶斯分类器def test_MultinomialNB(*data):    X_train,X_test,y_train,y_test=data    cls=naive_bayes.MultinomialNB()    cls.fit(X_train,y_train)    print("training score:%.2f"%(cls.score(X_train,y_train)))    print("testing score:%.2f"%(cls.score(X_test,y_test)))#检验不同alpha值对于分类结果的影响def test_MultinomialNB_alpha(*data):    X_train,X_test,y_train,y_test=data    alphas=np.logspace(-2,5,num=200)    training_score=[]    testing_score=[]    for alpha in alphas:        cls=naive_bayes.MultinomialNB(alpha=alpha)        cls.fit(X_train,y_train)        training_score.append(cls.score(X_train,y_train))        testing_score.append(cls.score(X_test,y_test))    #绘图    fig=plt.figure()    ax=fig.add_subplot(1,1,1)    ax.plot(alphas,training_score,label="training score")    ax.plot(alphas,testing_score,label="testing score")    ax.set_xlabel('alpha')    ax.set_ylabel('score')    ax.set_title("MultinomoalNB")    ax.set_xscale("log")    plt.show() #查看伯努利分类器效果   def test_BernoulliNB(*data):    X_train,X_test,y_train,y_test=data    cls=naive_bayes.BernoulliNB()    cls.fit(X_train,y_train)    print("training score:%.2f"%(cls.score(X_train,y_train)))    print("testing score:%.2f"%(cls.score(X_test,y_test)))## 查看不同alpha值的影响def test_BernoulliNB_alpha(*data):    X_train,X_test,y_train,y_test=data    alphas=np.logspace(-2,5,num=200)    training_score=[]    testing_score=[]    for alpha in alphas:        cls=naive_bayes.BernoulliNB(alpha=alpha)        cls.fit(X_train,y_train)        training_score.append(cls.score(X_train,y_train))        testing_score.append(cls.score(X_test,y_test))    #绘图    fig=plt.figure()    ax=fig.add_subplot(1,1,1)    ax.plot(alphas,training_score,label="training score")    ax.plot(alphas,testing_score,label="testing score")    ax.set_xlabel('alpha')    ax.set_ylabel('score')    ax.set_title("BerbuonlliNB")    ax.set_xscale("log")    plt.show() ##查看不同阙值的影响def test_BernoulliNB_binarize(*data):    X_train,X_test,y_train,y_test=data    min_x=min(np.min(X_train.ravel()),np.min(X_test.ravel()))-0.1    max_x=max(np.max(X_train.ravel()),np.max(X_test.ravel()))-0.1    binarizes=np.linspace(min_x,max_x,endpoint=True,num=100)    training_score=[]    testing_score=[]    for binarize in binarizes:        cls=naive_bayes.BernoulliNB(binarize=binarize)        cls.fit(X_train,y_train)        training_score.append(cls.score(X_train,y_train))        testing_score.append(cls.score(X_test,y_test))    ##绘图    fig=plt.figure()    ax=fig.add_subplot(1,1,1)    ax.plot(binarizes,training_score,label="training score")    ax.plot(binarizes,testing_score,label="testing score")    ax.set_xlabel('binarize')    ax.set_ylabel('score')    ax.set_title("BerbuonlliNB")    plt.show()if __name__=="__main__":    #show_digits()     X_train,X_test,y_train,y_test=load_data()     #test_GaussianNB(X_train,X_test,y_train,y_test)     #test_MultinomialNB(X_train,X_test,y_train,y_test)     #test_MultinomialNB_alpha(X_train,X_test,y_train,y_test)     #test_BernoulliNB_alpha(X_train,X_test,y_train,y_test)     test_BernoulliNB_binarize(X_train,X_test,y_train,y_test)

这里是我代码所产生的一些图片,可以参考一下!
这里写图片描述
这是关于alpha值对于多项贝叶斯分类的影响,可以看出随着取值过大,效果明显降低,这是英文在多项式贝叶斯分类里面:
P(X=asi/y=ck)=N+a/(N+na)
当n趋于无穷大时,所有概率均为1/n

这里写图片描述
同理对于伯努利贝叶斯方程也一样

这里写图片描述