python 实现斯坦福机器学习实验2.1

来源:互联网 发布:导购网站源码 下载 编辑:程序博客网 时间:2024/05/19 22:49

1、看了别人实现的梯度下降算法(https://stackoverflow.com/questions/18801002/fminunc-alternate-in-numpy),在numpy中学到了一个有用的矩阵操作方法,不用将所有的numpy数组通过np.mat()进行转换;np.dot()函数既能达到矩阵运算的目的,在对矩阵中单个元素的操作也较为方便。

2、在梯度最小化时用到了scipy库中的minimize函数
雅克布矩阵介绍:http://jacoxu.com/jacobian%E7%9F%A9%E9%98%B5%E5%92%8Chessian%E7%9F%A9%E9%98%B5/

# Machine Learning Online class: Logistic Regression# Load dataimport numpy as npimport matplotlib.pyplot as pltimport scipy.optimize as scop# ==================== Part 1: Loading and Visualizing Data ====================def loadData(filename):    fr = open(filename)    arrayLines = fr.readlines()    numberOfLines = len(arrayLines)    x = np.zeros((numberOfLines, 2))    y = np.zeros((numberOfLines, 1))    index = 0    for line in arrayLines:        line = line.strip()        listFormLine = line.split(',')        x[index, :] = listFormLine[:2]        y[index] = listFormLine[-1]        index += 1    return x, y, numberOfLinesdef plotData(x, y):    f2 = plt.figure(2)    idx_1 = np.where(y == 0)    p1 = plt.scatter(x[idx_1, 0], x[idx_1, 1], marker='x', color='m', label='Not admitted', s=30)    idx_2 = np.where(y == 1)    p2 = plt.scatter(x[idx_2, 0], x[idx_2, 1], marker='+', color='c', label='Admitted', s=50)    plt.xlabel('Exam 1 Score')    plt.xlabel('Exam 2 Score')    plt.legend(loc='upper right')    # plt.show()    return plt# ============ Part 2: Compute Cost and Gradient ============def sigmod(z):    g = np.zeros(z.shape)    # 在python 中,math.log()函数不能对矩阵直接进行操作    # http://blog.csdn.net/u013634684/article/details/49305655    g = 1 / (1 + np.exp(-z))    return gdef costFunction(theta, X, y):    m, n = X.shape    theta = theta.reshape((n, 1))    y = y.reshape((m, 1))    s1 = np.log(sigmod(np.dot(X, theta)))    s2 = np.log(1 - sigmod(np.dot(X, theta)))    s1 = s1.reshape((m, 1))    s2 = s2.reshape((m, 1))    s = y * s1 + (1 - y) * s2    J = -(np.sum(s)) / m    return Jdef Gradient(theta, X, y):    m, n = X.shape    theta = theta.reshape((n, 1))    y = y.reshape((m, 1))    grad = ((X.T).dot(sigmod(np.dot(X, theta)) - y)) / m    return grad.flatten()def mapFeature(x1, x2):    degree = 6    out = np.ones(x1.shape[1])    for i in range(degree):        for j in range(i):            newColumn = np.array([(x1 ** (i-j)).dot(x2 ** j)])            np.column_stack(out, newColumn)def plotDecisionBoundary(theta, X, y):    f2 = plotData(X[:, 1:], y)    # print(X[:, 1:])    m, n = X.shape    if n <= 3:    # Only need 2 points to define a line, so choose two endpoints        minVals = X[:, 1].min(0)-2        maxVals = X[:, 1].max(0)+2        plot_x = np.array([minVals, maxVals])        plot_y = (-1 / theta[2]) * (plot_x.dot(theta[1]) + theta[0])        f2.plot(plot_x, plot_y, label="Test Data", color='b')        plt.show()    else:    # Here is the grid range        u = np.linspace(-1, 1.5, 50)        v = np.linspace(-1, 1.5, 50)        z = np.zeros((len(u), len(v)))        for i in range(len(u)):            for j in range(len(v)):                z[i, j] = mapFeature(u[i], v[j])* theta# ============== Part 4: Predict and Accuracies ==============def predict(theta, X):    passif __name__ == '__main__':# ==================== Part 1: Loading and Visualizing Data ====================    x, y, numberOfLines = loadData('ex2data1.txt')    # plotData(x, y)# ============ Part 2: Compute Cost and Gradient ============# 相关代码 https://stackoverflow.com/questions/18801002/fminunc-alternate-in-numpy# 梯度下降法http://www.cnblogs.com/LeftNotEasy/archive/2010/12/05/mathmatic_in_machine_learning_1_regression_and_gradient_descent.html# 雅克布矩阵http://jacoxu.com/jacobian%E7%9F%A9%E9%98%B5%E5%92%8Chessian%E7%9F%A9%E9%98%B5/    columnOne = np.ones((numberOfLines, 1))    X = np.column_stack((columnOne, x))    m, n = X.shape    # initialTheta = np.zeros((X.shape[1], 1))    initialTheta = np.zeros(n)    cost = costFunction(initialTheta, X, y)    grad = Gradient(initialTheta, X, y)    print('Cost at initial theta (zeros):\n', cost)    print('Gradient at initial theta (zeros): \n', grad)    print('\nProgram paused. Press enter to continue.\n')    Result = scop.minimize(fun=costFunction, x0=initialTheta, args=(X, y), method='TNC', jac=Gradient)    optimalTheta = Result.x    # Print theta to screen    print('Cost at theta found by fminunc:', Result.fun)    print('theta: \n', optimalTheta)    # Plot Boundary    plotDecisionBoundary(optimalTheta, X, y)    prob = sigmod(np.array([1, 45, 85]).dot(optimalTheta))    print('For a student with scores 45 and 85, we predict an admission probability of ', prob)
原创粉丝点击