林轩田-机器学习基石-作业1-python源码

来源：互联网发布：闲鱼在淘宝哪里进去编辑：程序博客网时间：2024/05/19 19:42
以下代码是机器学习基石第一课（PLA）的课后编程15-20题的参考答案（python），直接复制即可运行

import sysimport numpy as npimport random as rdimport matplotlib.pylab as plt### learning raterate = 1def pla_error_rate(features, lables, parameter_vector):    length = len(features)    right = 0    wrong = 0    for i in range(length):        if lables[i][0]*(np.dot(features[i], parameter_vector)[0]) <= 0:            wrong += 1        else:            right += 1    return float(wrong)/float(length)def pla_pocket(features, lables, index_array, max_update_times, rate = 1):    w_pocket = np.zeros((5, 1))    w = np.zeros((5, 1))    sample_len = len(features)    flag = 1 ###algorithm halts flag, 1 for running    index = 0    count = 0    while (flag):        feature_index = index_array[index]        ### check if need update w        if lables[feature_index][0]*(np.dot(features[feature_index], w)[0]) <= 0:            ### update w:  w = w + yi*xi  b = b + yi            w = w + rate*lables[feature_index][0]*np.mat(features[feature_index]).T            count += 1            ### check if we need to update pocket            if pla_error_rate(features, lables, w) < pla_error_rate(features, lables, w_pocket):                w_pocket = w        if count >= max_update_times:            flag = 0        elif index >= sample_len - 1:            index = 0        else:            index += 1    return w_pocket,wdef pla_fix_index(features, lables, index_array, rate = 1):    w = np.zeros((5, 1))    sample_len = len(features)    flag = 1 ###algorithm halts flag, 1 for running    index = 0    right_items = 0  ### if right_items == feature len, algorithm halts    count = 0    while (flag):        feature_index = index_array[index]        ### check if need update w        if lables[feature_index][0]*(np.dot(features[feature_index], w)[0]) <= 0:            ### update w:  w = w + yi*xi  b = b + yi            w = w + rate*lables[feature_index][0]*np.mat(features[feature_index]).T            ### clean right items            right_items = 0            count += 1        else:            ### update             right_items += 1        if right_items >= sample_len:            flag = 0        elif index >= sample_len - 1:            index = 0        else:            index += 1    return count### perceptron learning algorithm, input featrues and lables,learning rate, return w,number of iterationsdef pla(features, lables, alpha = 1):    w = np.zeros((5, 1))    sample_len = len(features)    flag = 1 ###algorithm halts flag, 1 for running    index = 0    right_items = 0  ### if right_items == feature len, algorithm halts    count = 0    while (flag):        ### check if need update w        if lables[index][0]*(np.dot(features[index], w)[0]) <= 0:            ### update w:  w = w + yi*xi  b = b + yi            w = w + lables[index][0]*np.mat(features[index]).T            ### clean right items            right_items = 0            count += 1        else:            ### update             right_items += 1        if right_items >= sample_len:            flag = 0        elif index >= sample_len - 1:            index = 0        else:            index += 1    return count### import data from filedef load_data(file_path):    file_object = open(file_path)    try:        lines = file_object.readlines()    finally:        file_object.close()    sample_num = len(lines)    x = np.zeros((sample_num, 5))    y = np.zeros((sample_num, 1))    index = 0    for line in lines:        ### split feature and label        items = line.strip().split('\t')        x[index][1:5] = np.array([float(num) for num in items[0].strip().split()])[:]        x[index][0] = 1        y[index][0] = float(items[1])        index += 1    return x,yif __name__ == '__main__':    ### prolem 15    """    (X,Y) = load_data('data.txt')    print pla(X,Y,rate)    """    ### problem 16    """    (X,Y) = load_data('data.txt')    update_times_array = []    for i in range(200):        index_array = range(0,400)        rd.shuffle(index_array)        update_times = pla_fix_index(X, Y, index_array)        update_times_array.append(update_times)    bins = range(0,110,10)    plt.hist(update_times_array, bins, rwidth=0.7, histtype='bar')    plt.show()    """    ### problem 17    """    (X,Y) = load_data('data.txt')    update_times_array2 = []    for i in range(2000):        index_array = range(0,400)        rd.shuffle(index_array)        update_times = pla_fix_index(X, Y, index_array, rate=0.1)        update_times_array2.append(update_times)    print "Average num: ", sum(update_times_array2)/(len(update_times_array2)*1.0)    bins = range(0,110,10)    plt.hist(update_times_array2, bins, rwidth=0.7, histtype='bar')    plt.show()    """    ### problem 18 and 19 and 20    (X,Y) = load_data('pocket_train_data.txt')    (X_test,Y_test) = load_data('pocket_test_data.txt')    error_rate_array = []    w = np.zeros((5, 1))    w_50 = np.zeros((5, 1))    for i in range(200):        index_array = range(0,len(X))        rd.shuffle(index_array)        ### train on the training set        #(w,w_50) = pla_pocket(X, Y, index_array, 50)        (w,w_100) = pla_pocket(X, Y, index_array, 100)        ### test on the test set        #error_rate_array.append(pla_error_rate(X_test, Y_test, w))        #error_rate_array.append(pla_error_rate(X_test, Y_test, w_50))        error_rate_array.append(pla_error_rate(X_test, Y_test, w))    print "average error rate on test set: ",sum(error_rate_array)/(len(error_rate_array)*1.0)    bins = np.arange(0,1,0.1)    plt.hist(error_rate_array, bins, rwidth=0.7, histtype='bar')    plt.show()
阅读全文
0 0