机器学习实战 Logistic回归

来源:互联网 发布:老男孩linux运维28期 编辑:程序博客网 时间:2024/06/06 01:02
#!/usr/bin/env python# -*- coding: utf-8 -*-from numpy import *def loadDataSet():    dataMat = []; labelMat = []    fr = open('testSet.txt')    for line in fr.readlines():        lineArr = line.strip().split()        dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])        labelMat.append(int(lineArr[2]))    return dataMat,labelMatdef sigmoid(inX):    return 1.0/(1+exp(-inX))def gradAscent(dataMatIn, classLabels):#梯度上升法    dataMatrix = mat(dataMatIn)             #convert to NumPy matrix    labelMat = mat(classLabels) #convert to NumPy matrix    labelMat = labelMat.transpose()    m,n = shape(dataMatrix)    alpha = 0.001#移动步长    maxCycles = 500#迭代次数    weights = ones((n,1))    for k in range(maxCycles):              #heavy on matrix operations        h = sigmoid(dataMatrix*weights)     #matrix mult        error = (labelMat - h)              #vector subtraction        weights = weights + alpha * dataMatrix.transpose()* error #matrix mult # W = W + a*ᐁf(W)    return weightsdef plotBestFit(weights):    import matplotlib.pyplot as plt    dataMat,labelMat=loadDataSet()    dataArr = array(dataMat)    n = shape(dataArr)[0]     xcord1 = []; ycord1 = []    xcord2 = []; ycord2 = []    for i in range(n):        if int(labelMat[i])== 1:            xcord1.append(dataArr[i,1]); ycord1.append(dataArr[i,2])        else:            xcord2.append(dataArr[i,1]); ycord2.append(dataArr[i,2])    fig = plt.figure()    ax = fig.add_subplot(111)    ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')    ax.scatter(xcord2, ycord2, s=30, c='green')    x = arange(-3.0, 3.0, 0.1)    y = (-weights[0]-weights[1]*x)/weights[2]    ax.plot(x, y)    plt.xlabel('X1'); plt.ylabel('X2');    plt.show()def stocGradAscent0(dataMatrix, classLabels, numIter=150):#一次仅用一个样本点更新回归系数 随机梯度上升法    m,n = shape(dataMatrix)    alpha = 0.01    weights = ones(n)   #initialize to all ones    for j in range(numIter):        for i in range(m):            h = sigmoid(sum(dataMatrix[i]*weights))            error = classLabels[i] - h            weights = weights + alpha * error * dataMatrix[i]    return weightsdef stocGradAscent1(dataMatrix, classLabels, numIter=150):#改进:alpha随迭代次数减小 随机选取样本    m,n = shape(dataMatrix)    weights = ones(n)   #initialize to all ones    for j in range(numIter):        dataIndex = range(m)        for i in range(m):            alpha = 4/(1.0+j+i)+0.0001#apha decreases with iteration, does not go to 0 because of the constant            randIndex = int(random.uniform(0,len(dataIndex)))            h = sigmoid(sum(dataMatrix[randIndex]*weights))            error = classLabels[randIndex] - h            weights = weights + alpha * error * dataMatrix[randIndex]            del(dataIndex[randIndex])    return weightsif __name__ == '__main__':    dataArr, labelMat = loadDataSet()    weights = gradAscent(dataArr,labelMat)    plotBestFit(weights.getA())    weights = stocGradAscent0(array(dataArr),labelMat)    plotBestFit(weights)    weights = stocGradAscent1(array(dataArr),labelMat)    plotBestFit(weights)    


testSet.txt:

-0.017612    14.053064    0
-1.395634    4.662541    1
-0.752157    6.538620    0
-1.322371    7.152853    0
0.423363    11.054677    0
0.406704    7.067335    1
0.667394    12.741452    0
-2.460150    6.866805    1
0.569411    9.548755    0
-0.026632    10.427743    0
0.850433    6.920334    1
1.347183    13.175500    0
1.176813    3.167020    1
-1.781871    9.097953    0
-0.566606    5.749003    1

原创粉丝点击