机器学习基石 作业1 程序题(15-20)

来源:互联网 发布:Java 记录日志 demo 编辑:程序博客网 时间:2024/05/19 16:36

import numpy as npimport randomdef read_data(path):    f1=open(path)    x_matrix=[]    y_matrix=[]    for i in f1:        x=[1]        for j in i.split('\t')[0].split():            x.append(float(j))        y_matrix.append(int(i.strip().split('\t')[1]))        x_matrix.append(x)    x_matrix=np.array(x_matrix)    #print(x_matrix)    return x_matrix,y_matrixdef sign(x,w):    if np.dot(x, w)[0]<=0:        return -1    else:        return 1def naive_PLA(x_matrix,y_matrix):    sum=len(x_matrix)    length=len(x_matrix[0])    w=np.zeros((length,1))    #print(w)    count=0    s=0    flag=0    while True:        for i in range(sum):            s+=1            #print(np.dot(x_matrix[i], w)[0]*y_matrix[i])            if sign(x_matrix[i], w)!=y_matrix[i]:                #print(w,x_matrix[i],y_matrix[i])                w+=np.matrix(x_matrix[i]).T*y_matrix[i]                count+=1                s=0            if s==sum:                flag=1                break        if flag==1:            break    return countdef random_PLA(x_matrix,y_matrix):    sum=len(x_matrix)    length=len(x_matrix[0])    w=np.zeros((length,1))    order=range(sum)    #print(order)    random_seed=random.sample(order,sum)    #print(random_seed)    count=0    s=0    flag=0    while True:        for i in random_seed:            s+=1            #print(np.dot(x_matrix[i], w)[0]*y_matrix[i])            if sign(x_matrix[i], w)!=y_matrix[i]:                #print(w,x_matrix[i],y_matrix[i])                w+=np.matrix(x_matrix[i]).T*y_matrix[i]                count+=1                s=0            if s==sum:                flag=1                break        if flag==1:            break    return countdef weighted_random_PLA(x_matrix,y_matrix,eta):    sum=len(x_matrix)    length=len(x_matrix[0])    w=np.zeros((length,1))    order=range(sum)    #print(order)    random_seed=random.sample(order,sum)    #print(random_seed)    count=0    s=0    flag=0    while True:        for i in random_seed:            s+=1            #print(np.dot(x_matrix[i], w)[0]*y_matrix[i])            if sign(x_matrix[i], w)!=y_matrix[i]:                #print(w,x_matrix[i],y_matrix[i])                w+=np.matrix(x_matrix[i]).T*y_matrix[i]*eta                count+=1                s=0            if s==sum:                flag=1                break        if flag==1:            break    return countif __name__=='__main__':    x_matrix, y_matrix=read_data('ntumlone_hw1_hw1_15_train.dat')    #count=naive_PLA(x_matrix,y_matrix)       #Question 15    #print(count)    # sum=0                                     #Question 16    # for i in range(2000):    #     sum+=random_PLA(x_matrix,y_matrix)    #     print()    # print(sum/2000)    sum=0                                     #Question 17    for i in range(2000):        sum+=weighted_random_PLA(x_matrix,y_matrix,0.5)    print(sum/2000)

import numpy as npimport randomimport copydef read_data(path):    f1=open(path)    x_matrix=[]    y_matrix=[]    for i in f1:        x=[1]        for j in i.split('\t')[0].split():            x.append(float(j))        y_matrix.append(int(i.strip().split('\t')[1]))        x_matrix.append(x)    x_matrix=np.array(x_matrix)    #print(x_matrix)    return x_matrix,y_matrixdef sign(x,w):    if np.dot(x, w)[0]<=0:        return -1    else:        return 1def test(w,x_matrix,y_matrix,sum):    count=0    for i in range(sum):        if sign(x_matrix[i],w) !=y_matrix[i]:            count+=1    return countdef random_pocket(x_matrix,y_matrix,updates):    sum=len(x_matrix)    length=len(x_matrix[0])    order=range(sum)    random_seed=random.sample(order,sum)    bestW=np.zeros((length,1))    w= np.zeros((length, 1))    bestCount=501    update=0    while update<updates:        for i in random_seed:            if sign(x_matrix[i], w)!=y_matrix[i]:                update += 1                w=w+np.matrix(x_matrix[i]).T*y_matrix[i]                count=test(w,x_matrix,y_matrix,sum)                if count<bestCount:                    bestCount=count                    bestW=w            if update==updates:                break    return bestWdef random_PLA(x_matrix,y_matrix):    sum=len(x_matrix)    length=len(x_matrix[0])    w=np.zeros((length,1))    order=range(sum)    #print(order)    random_seed=random.sample(order,sum)    #print(random_seed)    count=0    while True:        for i in random_seed:            if sign(x_matrix[i], w)!=y_matrix[i]:                w+=np.matrix(x_matrix[i]).T*y_matrix[i]                count+=1            if count==50:                break        if count == 50:            break    return wif __name__=='__main__':    x_matrix, y_matrix=read_data('train.txt')    x_test,y_test=read_data('test.txt')    sum=len(x_test)    error=0    # for i in range(2000):                        #Question 18    #     print(i)    #     w=random_pocket(x_matrix, y_matrix, 50)    #     count=test(w,x_test,y_test,sum)    #     error+=count    # print(float(error)/sum/2000)    # for i in range(2000):                        #Question 19    #     print(i)    #     w=random_PLA(x_matrix, y_matrix)    #     count=test(w,x_test,y_test,sum)    #     error+=count    # print(float(error)/sum/2000)    for i in range(2000):                          #Question 20        print(i)        w=random_pocket(x_matrix, y_matrix, 100)        count=test(w,x_test,y_test,sum)        error+=count    print(float(error)/sum/2000)


阅读全文
0 0