详细解释《机器学习实战》第5章logistic回归 第5-1程序(梯度下降法推导)和梯度下降法实现对率回归

来源:互联网 发布:excel画出数据形状 编辑:程序博客网 时间:2024/05/28 23:12

从别人的参考过来的推导过程

这里写图片描述

这里写图片描述

梯度下降法实现对率回归

数据集

这里写图片描述

代码

# -*- coding: utf-8 -*-from log import *import pandas as pdfrom pandas import *import matplotlib.pyplot as pltimport numpy as npfrom numpy import *#读取数据到X,ydataset = np.loadtxt(r'C:\Users\zmy\Desktop\titanic\watermelon.csv',delimiter=",")X = dataset[0:2, :]y = dataset[3,:]X = X.transpose()y = y.transpose()df = pd.DataFrame(X, columns = ['density', 'ratio_sugar'])m,n = shape(df.values)df['norm'] = ones((m,1))dataMat = array(df[['norm', 'density', 'ratio_sugar']].values[:,:])labelMat = array(y).transpose()def sigmoid(inX):    return 1.0 / (1+exp(-inX))#梯度下降法def gradAscend(dataMatIn, classLabels):    dataMatrix = dataMatIn    # labelMat = mat(classLabels).transpose()    m,n = shape(dataMatrix)    alpha = 0.1    maxCycle = 500    weights = ones((n,1))    for i in range(maxCycle):        a = dot(dataMatrix, weights)        # print a        h = sigmoid(a)        error = (labelMat - h)        weights = weights + alpha * dataMatrix.transpose()*error    return weights# 随机梯度下降法def stocGradAscend1(dataMat, labelMat, numIter =50):    print dataMat    print labelMat    m, n = shape(dataMat)    weights = ones(n)    weights = array(weights)    for j in range(numIter):        dataIndex = range(m)        for i in range(m):            # dataIndex = range(m)            alpha = 40 / (1.0+j+i) + 0.2            randIndex_temp = int(random.uniform(0, len(dataIndex)))            randIndex = dataIndex[randIndex_temp]            h = sigmoid(sum(dataMat[randIndex]*weights))            error = labelMat[randIndex] - h            weights = weights + alpha * error * dataMat[randIndex]            del(dataIndex[randIndex_temp])    return weightsdef plotBestFit(weights):    import matplotlib.pyplot as plt    # dataMat, labelMat = loadDataSet()    dataArr = array(dataMat)    n = shape(dataArr)[0]    xcord1 = []    ycord1 = []    xcord2 = []    ycord2 = []    for i in range(n):        if int(labelMat[i]) == 0:            xcord2.append(dataArr[i,1]); ycord2.append(dataArr[i, 2])        else:            xcord1.append(dataArr[i,1]); ycord1.append(dataArr[i, 2])    fig = plt.figure()    ax = fig.add_subplot(111)    #画出散点图    ax.scatter(xcord1, ycord1, s = 30, c = 'red', marker='s',label = '1')    ax.scatter(xcord2, ycord2, s=30, c = 'g', label = '0')    # 设置直线的x,y上的点    x = arange(0.2, 0.8, 0.1)    y = array((-weights[0] - weights[1]*x)/weights[2])    y=y.transpose()    ax.plot(x, y)    plt.xlabel('density')    plt.ylabel('ratio_sugar')    plt.legend(loc = 'upper right')    plt.title("random gradAscent logistic regression")    plt.show()# 函数调用weights = stocGradAscend1(dataMat, labelMat)plotBestFit(weights)

梯度下降法的结果:

这里写图片描述

随机梯度下降法的结果:

这里写图片描述

阅读全文
0 0