01利用sklean练习机器学习--sklean之感知机preceptron模型实践

来源:互联网 发布:linux 查看字体 编辑:程序博客网 时间:2024/06/05 20:35

1 介绍

台湾大学林轩田教授机器学习基石
02 Learning to Answer Yes or No
学习利用感知机来处理二分类模型

2 自己实现

#-*- coding:utf-8 -*-from numpy import *import matplotlib.pyplot as pltimport operatorimport timedef createTrainDataSet():#训练样本,第一个1为阈值对应的w,下同    trainData = [   [1, 1, 4],                    [1, 2, 3],                    [1, -2, 3],                    [1, -2, 2],                    [1, 0, 1],                    [1, 1, 2]]    label = [1, 1, 1, -1, -1,  -1]    return trainData, labeldef createTestDataSet():#数据样本    testData = [   [1, 1, 1],                   [1, 2, 0],                   [1, 2, 4],                   [1, 1, 3]]    return testDatadef sigmoid(X):    X = float(X)    if X > 0:        return 1    elif X < 0:        return -1    else:        return 0def pla(traindataIn,trainlabelIn):    traindata=mat(traindataIn)    trainlabel=mat(trainlabelIn).transpose()    m,n=shape(traindata)    w=ones((n,1))    while True:        iscompleted=True        for i in range(m):            if (sigmoid(dot(traindata[i],w))==trainlabel[i]):                continue            else:                iscompleted=False                w+=(trainlabel[i]*traindata[i]).transpose()        if iscompleted:            break    return wdef classify(inX,w):    result=sigmoid(sum(w*inX))    if result>0:        return 1    else:        return -1def plotBestFit(w):    traindata,label=createTrainDataSet()    dataArr = array(traindata)    n = shape(dataArr)[0]    xcord1=[];ycord1=[]    xcord2=[];ycord2=[]    for i in range(n):        if int(label[i])==1:            xcord1.append(dataArr[i,1])            ycord1.append(dataArr[i,2])        else:            xcord2.append(dataArr[i, 1])            ycord2.append(dataArr[i, 2])    fig=plt.figure()    ax= fig.add_subplot(111)    ax.scatter(xcord1, ycord1,s=30,c='red',marker='s')    ax.scatter(xcord2, ycord2,s=30,c='green')    x = arange(-3.0, 3.0, 0.1)    y = (-w[0]-w[1] * x)/w[2]    ax.plot(x, y)    plt.xlabel('X1'); plt.ylabel('X2')    plt.show()def classifyall(datatest,w):    predict=[]    for data in datatest:        result=classify(data,w)        predict.append(result)    return predictdef main():    trainData,label=createTrainDataSet()    testdata=createTestDataSet()    w=pla(trainData,label)    result=classifyall(testdata,w)    plotBestFit(w)    print w    print resultif __name__=='__main__':    start = time.clock()    main()    end = time.clock()    print('finish all in %s' % str(end - start))

参考:http://www.mamicode.com/info-detail-1434773.html

3 利用sklean

这里涉及利用sklean来生成数据,训练,保存,评估模型以及利用模型来预测等例子。

#-*- coding:utf-8 -*-from sklearn.datasets import make_classificationfrom matplotlib import pyplot as pltfrom sklearn.linear_model import Perceptronfrom sklearn.externals import joblibimport numpy as npx,y = make_classification(n_samples=1000, n_features=2,n_redundant=0,n_informative=1,n_clusters_per_class=1)#n_samples:生成样本的数量#n_features=2:生成样本的特征数,特征数=n_informative() + n_redundant + n_repeated#n_informative:多信息特征的个数#n_redundant:冗余信息,informative特征的随机线性组合#n_clusters_per_class :某一个类别是由几个cluster构成的#训练数据和测试数据x_data_train = x[:800,:]x_data_test = x[800:,:]y_data_train = y[:800]y_data_test = y[800:]#正例和反例positive_x1 = [x[i,0] for i in range(1000) if y[i] == 1]positive_x2 = [x[i,1] for i in range(1000) if y[i] == 1]negetive_x1 = [x[i,0] for i in range(1000) if y[i] == 0]negetive_x2 = [x[i,1] for i in range(1000) if y[i] == 0]#定义感知机clf = Perceptron(fit_intercept=False,n_iter=30,shuffle=False)#使用训练数据进行训练clf.fit(x_data_train,y_data_train)#得到训练结果,权重矩阵print(clf.coef_)#输出为:[[-0.38478876,4.41537463]]#超平面的截距,此处输出为:[0.]print(clf.intercept_)#保存模型joblib.dump(clf,'pla.model')#利用测试数据进行验证CLF = joblib.load('pla.model')acc = CLF.score(x_data_test,y_data_test)print(acc)#得到的输出结果为0.995,这个结果还不错吧。#测试单个事例print "[2.0,1.0] predict:",CLF.predict([2.0,1.0])#画出正例和反例的散点图plt.scatter(positive_x1,positive_x2,c='red')plt.scatter(negetive_x1,negetive_x2,c='blue')#画出超平面(在本例中即是一条直线)line_x = np.arange(-4,4)line_y = line_x * (-CLF.coef_[0][0] / CLF.coef_[0][1]) - CLF.intercept_plt.plot(line_x,line_y)plt.show()

这里写图片描述

参考https://zhuanlan.zhihu.com/p/27152953