利用SVD简化数据

来源:互联网 发布:单片机高八位低八位 编辑:程序博客网 时间:2024/06/05 19:03

       奇异值分解(Singular Value Decomposition,SVD ---一种矩阵分解技术):用小得多的数据集来表示原始数据。主要应用于:信息检索、推荐系统。

     SVD 将原始数据集矩阵Data分解成三个矩阵:


   SVD 的代码实现:

from numpy import *from numpy import linalg as ladef loadExData():    return[[0, 0, 0, 2, 2],           [0, 0, 0, 3, 3],           [0, 0, 0, 1, 1],           [1, 1, 1, 0, 0],           [2, 2, 2, 0, 0],           [5, 5, 5, 0, 0],           [1, 1, 1, 0, 0]]    def loadExData2():    return[[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],           [0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],           [0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],           [3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],           [5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],           [0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],           [4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],           [0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],           [0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],           [0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],           [1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]]    def ecludSim(inA,inB):    return 1.0/(1.0 + la.norm(inA - inB))def pearsSim(inA,inB):    if len(inA) < 3 : return 1.0    return 0.5+0.5*corrcoef(inA, inB, rowvar = 0)[0][1]def cosSim(inA,inB):    num = float(inA.T*inB)    denom = la.norm(inA)*la.norm(inB)    return 0.5+0.5*(num/denom)def standEst(dataMat, user, simMeas, item):    n = shape(dataMat)[1]    simTotal = 0.0; ratSimTotal = 0.0    for j in range(n):        userRating = dataMat[user,j]        if userRating == 0: continue        overLap = nonzero(logical_and(dataMat[:,item].A>0, \                                      dataMat[:,j].A>0))[0]        if len(overLap) == 0: similarity = 0        else: similarity = simMeas(dataMat[overLap,item], \                                   dataMat[overLap,j])        print 'the %d and %d similarity is: %f' % (item, j, similarity)        simTotal += similarity        ratSimTotal += similarity * userRating    if simTotal == 0: return 0    else: return ratSimTotal/simTotal    #基于SVD的评分估计def svdEst(dataMat, user, simMeas, item):    n = shape(dataMat)[1]    simTotal = 0.0; ratSimTotal = 0.0    U,Sigma,VT = la.svd(dataMat)    Sig4 = mat(eye(4)*Sigma[:4]) #arrange Sig4 into a diagonal matrix    xformedItems = dataMat.T * U[:,:4] * Sig4.I  #create transformed items    for j in range(n):        userRating = dataMat[user,j]        if userRating == 0 or j==item: continue        similarity = simMeas(xformedItems[item,:].T,\                             xformedItems[j,:].T)        print 'the %d and %d similarity is: %f' % (item, j, similarity)        simTotal += similarity        ratSimTotal += similarity * userRating    if simTotal == 0: return 0    else: return ratSimTotal/simTotaldef recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):    unratedItems = nonzero(dataMat[user,:].A==0)[1]#find unrated items     if len(unratedItems) == 0: return 'you rated everything'    itemScores = []    for item in unratedItems:        estimatedScore = estMethod(dataMat, user, simMeas, item)        itemScores.append((item, estimatedScore))    return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N]def printMat(inMat, thresh=0.8):    for i in range(32):        for k in range(32):            if float(inMat[i,k]) > thresh:                print 1,            else: print 0,        print ''# 基于SVD的图像压缩def imgCompress(numSV=3, thresh=0.8):    myl = []    for line in open('0_5.txt').readlines():        newRow = []        for i in range(32):            newRow.append(int(line[i]))        myl.append(newRow)    myMat = mat(myl)    print "****original matrix******"    printMat(myMat, thresh)    U,Sigma,VT = la.svd(myMat)    SigRecon = mat(zeros((numSV, numSV)))    for k in range(numSV):#construct diagonal matrix from vector        SigRecon[k,k] = Sigma[k]    reconMat = U[:,:numSV]*SigRecon*VT[:numSV,:]    print "****reconstructed matrix using %d singular values******" % numSV    printMat(reconMat, thresh)

SVD 是一种强大的降维工具,我们可以利用SVD来逼近矩阵并从中提取重要特征,通过保留80-90%的能量就可以去掉噪声,SVD已经运用到多个应用中,其中一个成功的案例就是推荐引擎

0 0
原创粉丝点击