斯坦福大学机器学习作业题Problem Set #1 Regression for denoising quasar spectra 下篇

来源：互联网发布：深圳软件学校编辑：程序博客网时间：2024/06/05 10:17

（i）处理文件

# -*- coding: utf-8 -*-import numpy as npimport mathimport matplotlib.pyplot as pltimport csvdef read():    fr=open('quasar_test.csv','r')    arrayline=fr.readlines()    y=arrayline[1].strip().split(',')    m=len(arrayline)    n=len(y)    y2 = np.zeros((m - 1, n))    x = np.zeros((2, n))    x[0] = arrayline[0].strip().split(',')    x[1] = 1    y=map(lambda y: float(y), y)    y2[0] = weighted_linear_regression(x, y, 5)    q=[]    for i in range(len(y2[0])):        q.append(y2[0][i])    f1 = open('result1.csv', 'w')    f1.write(str(q))    f1.write('\n')    i=1    for line in arrayline[2:]:        y = line.strip().split(',')        y = map(lambda y: float(y), y)        y2[i]=weighted_linear_regression(x, y, 5)        q = []        for j in range(len(y2[0])):            q.append(y2[i][j])        print q        i=i+1        f1.write(str(q))        f1.write('\n')    return y2def weighted_linear_regression(x,y,t):#加权线性回归    y2=[]    for i in range(len(x[0])):        w=np.zeros((len(x[0]),len(x[0])))        for j in range(len(x[0])):            w[j][j]=math.exp((x[0][i] - x[0][j])*(x[0][i] - x[0][j]) / (-2 * t * t))        xwx=np.dot(np.dot(x,w),np.transpose(x))        xwx_inverse=np.linalg.inv(xwx)        xwx_inverse_x=np.dot(xwx_inverse,x)        xwx_inverse_x_w = np.dot(xwx_inverse_x,w)        xwx_inverse_x_w_y=np.dot(xwx_inverse_x_w,np.transpose(y))        theta=xwx_inverse_x_w_y        y2.append(x[0][i] * theta[0] + theta[1])    return y2y2=read()

（ii）

# -*- coding: utf-8 -*-import numpy as npimport heapqimport csvdef read():    fr=open('result.csv')    arrayline = fr.readlines()    x = arrayline[0].strip( ).split(',')    x=map(lambda x: float(x), x)    m=len(arrayline)    n=len(x)    y=np.zeros((m-1,n))    i=0    for line in arrayline[1:]:        num=line.strip().split(',')        num = map(lambda num: float(num), num)        y[i]=num        i=i+1    return x,yx,y=read()def predict(x,y,m):    dis=[]    for i in range(len(y)):        sum = 0        for j in range(150,450):            sum=sum+(y[m][j]-y[i][j])*(y[m][j]-y[i][j])        dis.append(sum)    smallest_four=heapq.nsmallest(4, dis)    smallest_three=smallest_four[1:]    location=[]    for i in range(len(smallest_three)):        location.append(dis.index(smallest_three[i]))    h=max(smallest_three)    f_left=[]    for i in range(50):        top=0        bottom=0        for j in range(len(smallest_three)):            n=location[j]            top = top + (1 - smallest_three[j]/ h) * y[n][i]            bottom = bottom + (1 -smallest_three[j]/ h)        sum=top / bottom        f_left.append(sum)    return f_leftdef error(y):    f_left_predict=[]    sum_error=[]    for m in range(len(y)):        error=0        f_left=predict(x, y, m)        for n in range(len(f_left)):            error=error+(f_left[n]-y[m][n])*(f_left[n]-y[m][n])        f_left_predict.append(f_left)        sum_error.append(error)    print sum(sum_error)/len(sum_error)error(y)

（iii）

# -*- coding: utf-8 -*-import numpy as npimport heapqimport matplotlib.pyplot as pltdef read1():       #读取数据    fr=open('result.csv')    arrayline = fr.readlines()    x = arrayline[0].strip( ).split(',')    x=map(lambda x: float(x), x)    m=len(arrayline)    n=len(x)    y=np.zeros((m-1,n))    i=0    for line in arrayline[1:]:        num=line.strip().split(',')        num = map(lambda num: float(num), num)        y[i]=num        i=i+1    return x,ydef read2():       #读取预测数据    fr=open('result1.csv')    arrayline = fr.readlines()    x= arrayline[0].strip( ).split(',')    x=map(lambda x: float(x), x)    m=len(arrayline)    n=len(x)    y=np.zeros((m-1,n))    i=0    for line in arrayline[1:]:        num=line.strip().split(',')        num = map(lambda num: float(num), num)        y[i]=num        i=i+1    return x,ydef predict(x,y,y1,m):  #y1为预测数据    dis=[]    for i in range(len(y)):        sum = 0        for j in range(150,450):            sum=sum+(y1[m][j]-y[i][j])*(y1[m][j]-y[i][j])        dis.append(sum)    smallest_three=heapq.nsmallest(3, dis)    location=[]    for i in range(len(smallest_three)):        location.append(dis.index(smallest_three[i]))    h=max(smallest_three)    f_left=[]    for i in range(50):        top=0        bottom=0        for j in range(len(smallest_three)):            n=location[j]            top = top + (1 - smallest_three[j]/ h) * y[n][i]            bottom = bottom + (1 -smallest_three[j]/ h)        sum=top / bottom        f_left.append(sum)    return f_leftdef error(x,y,y1):    f_left_predict=[]    sum_error=[]    for m in range(len(y1)):        f_left=predict(x,y,y1,m)        error = 0        for n in range(len(f_left)):            error=error+(f_left[n]-y1[m][n])*(f_left[n]-y1[m][n])        f_left_predict.append(f_left)        sum_error.append(error)    return f_left_predict[0],f_left_predict[5],sum(sum_error)/len(sum_error)def figure(x,example_1,example_6,y1):        #画图    plt.figure(1)    plt.xlabel('Wavelength')    plt.ylabel('Flux')    plt.scatter(x[0:50],example_1, marker='.', color='b', label='predict value', s=10)    plt.scatter(x[0:50], y1[0][0:50], marker='.', color='g', label='real value', s=10)    plt.legend(loc='upper right')    plt.figure(2)    plt.xlabel('Wavelength')    plt.ylabel('Flux')    plt.scatter(x[0:50], example_6, marker='.', color='b', label='predict value', s=10)    plt.scatter(x[0:50], y1[5][0:50], marker='.', color='g', label='real value', s=10)    plt.legend(loc='upper right')    plt.show()def main():    x,y=read1()    x1,y1=read2()    example_1, example_6, error1 = error(x,y, y1)    figure(x,example_1,example_6,y1)if __name__ == '__main__':    main()

阅读全文

0 0