机器学习实战 回归

来源:互联网 发布:it是哪个国家 编辑:程序博客网 时间:2024/05/22 20:11
from numpy import *import matplotlib.pyplot as pltdef loadDataSet(filename):    numFeat=len(open(filename).readline().split('\t'))-1    dataMat=[]    labelMat=[]    fr=open(filename)    for line in fr.readlines():        lineArr=[]        curLine=line.strip().split('\t')        for i in range(numFeat):            lineArr.append(float(curLine[i]))        dataMat.append(lineArr)        labelMat.append(float(curLine[-1]))    return dataMat,labelMatdef standRegres(xArr,yArr):    xMat=mat(xArr)    yMat=mat(yArr).T    xTx=xMat.T*xMat    if linalg.det(xTx)==0.0:        print('this matrix is singular,cannot do inverse')        return    ws=xTx.I*(xMat.T*yMat)    return ws'''#测试线性回归xArr,yArr=loadDataSet('C:/Users/xuwei/Desktop/机器学习/机器学习实战(pdf版+源码)/machinelearninginaction/Ch08/ex0.txt')print(xArr[0:2])print(yArr)ws=standRegres(xArr, yArr)print(ws)xMat=mat(xArr)yMat=mat(yArr)yHat=xMat*wsfig=plt.figure()ax=fig.add_subplot(111)ax.scatter(xMat[:,1].flatten().A[0],yMat.T[:,0].flatten().A[0])xCopy=xMat.copy()xCopy.sort(0)yHat=xCopy*wsax.plot(xCopy[:,1],yHat)plt.show()'''#局部加权线性回归函数(lwlr)def lwlr(testPoint,xArr,yArr,k=1.0):  #计算的只是这行的数据对应y的线性回归    xMat=mat(xArr)    yMat=mat(yArr).T    m=shape(xMat)[0]    weights=mat(eye((m)))  #生成对角矩阵    for j in range(m):                       diffMat=testPoint-xMat[j,:]      #权重大小以指数级衰减        weights[j,j]=exp(diffMat*diffMat.T/(-2.0*k**2))    xTx=xMat.T*(weights*xMat)    if linalg.det(xTx)==0.0:        print('this matrix is singular,cannot do inverse')        return    ws=xTx.I*(xMat.T*(weights*yMat))    return testPoint*ws    def lwlrTest(testArr,xArr,yArr,k=1.0):    m=shape(testArr)[0]    yHat=zeros(m)    for i in range(m):        yHat[i]=lwlr(testArr[i],xArr,yArr,k)    return yHat'''#测试lwlrxArr,yArr=loadDataSet('C:/Users/xuwei/Desktop/机器学习/机器学习实战(pdf版+源码)/machinelearninginaction/Ch08/ex0.txt')yHat=lwlrTest(xArr,xArr,yArr,0.01)xMat=mat(xArr)strInd=xMat[:,1].argsort(0)   # 0为按列排序xSort=xMat[strInd][:,0,:]''' '''xSort格式[[ 1.        0.014855] [ 1.        0.015371] [ 1.        0.033859] [ 1.        0.038326] [ 1.        0.040486] [ 1.        0.045353]]''''''fig=plt.figure()ax=fig.add_subplot(111)ax.plot(xSort[:,1],yHat[strInd])ax.scatter(xMat[:,1].flatten().A[0],mat(yArr).T.flatten().A[0],s=2,c='red')plt.show()'''#测试鲍鱼寿命def rssError(yArr,yHatArr):    return ((yArr-yHatArr)**2).sum()'''测试鲍鱼寿命abX,abY=loadDataSet('C:/Users/xuwei/Desktop/机器学习/机器学习实战(pdf版+源码)/machinelearninginaction/Ch08/abalone.txt')yHat01=lwlrTest(abX[0:99],abX[0:99],abY[0:99],0.1)print(rssError(abY[0:99], yHat01))yHat1=lwlrTest(abX[0:99],abX[0:99],abY[0:99],1)print(rssError(abY[0:99], yHat1))yHat10=lwlrTest(abX[0:99],abX[0:99],abY[0:99],10)print(rssError(abY[0:99], yHat10))yHat01=lwlrTest(abX[100:199],abX[0:99],abY[0:99],0.1)print(rssError(abY[100:199], yHat01))yHat1=lwlrTest(abX[100:199],abX[0:99],abY[0:99],1)print(rssError(abY[100:199], yHat1))yHat10=lwlrTest(abX[100:199],abX[0:99],abY[0:99],10)print(rssError(abY[100:199], yHat10))'''#岭回归def ridgeRegres(xMat,yMat,lam=0.2):    xTx=xMat.T*xMat    denom=xTx+eye(shape(xMat)[1])*lam     if linalg.det(denom)==0.0:        print("this matrix is singular,cannot do inverse")        return    ws=denom.I*(xMat.T*yMat)    return ws    def ridgeTest(xArr,yArr):    xMat=mat(xArr)    yMat=mat(yArr).T         yMean=mean(yMat,0)    yMat=yMat-yMean    xMeans=mean(xMat,0)    xVar=var(xMat,0)    xMat=(xMat-xMeans)/xVar     #数据标准化    numTestPts=30    wMat=zeros((numTestPts,shape(xMat)[1]))    for i in range(numTestPts):        ws=ridgeRegres(xMat,yMat,exp(i-10))        wMat[i,:]=ws.T    return wMat    '''测试岭回归abX,abY=loadDataSet('C:/Users/xuwei/Desktop/机器学习/机器学习实战(pdf版+源码)/machinelearninginaction/Ch08/abalone.txt')ridgeWeights=ridgeTest(abX,abY)fig=plt.figure()ax=fig.add_subplot(111)ax.plot(ridgeWeights)plt.show()'''def regularize(xMat):#regularize by columns    inMat = xMat.copy()    inMeans = mean(inMat,0)   #calc mean then subtract it off    inVar = var(inMat,0)      #calc variance of Xi then divide by it    inMat = (inMat - inMeans)/inVar    return inMat#前向逐步线性回归def stageWise(xArr,yArr,eps=0.01,numIt=100):    xMat=mat(xArr)    yMat=mat(yArr).T     yMean=mean(yMat,0)    yMat=yMat-yMean    #均值为0方差为1标准化处理    xMat=regularize(xMat)    m,n=shape(xMat)    returnMat=zeros((numIt,n))    ws=zeros((n,1))    wsTest=ws.copy()    wsMat=ws.copy()    for i in range(numIt):        print(ws.T)        lowestError=inf;        for j in range(n):            for sign in [-1,1]:                wsTest=ws.copy()                wsTest[j]+=eps*sign                yTest=xMat*wsTest                rssE=rssError(yMat.A,yTest.A)                if rssE<lowestError:                    lowestError=rssE                    wsMax=wsTest        ws=wsMax.copy()        returnMat[i,:]=ws.T    return returnMatxArr,yArr=loadDataSet('C:/Users/xuwei/Desktop/机器学习/机器学习实战(pdf版+源码)/machinelearninginaction/Ch08/abalone.txt')print(stageWise(xArr,yArr,0.001,500))

原创粉丝点击