机器学习PCA实现

来源:互联网 发布:java nio 教程 编辑:程序博客网 时间:2024/06/04 19:29

代码链接:


from numpy import *def loadDataSet(fileName, delim='\t'):    fr = open(fileName)    stringArr = [line.strip().split(delim) for line in fr.readlines()]    datArr = []    for line in stringArr:        curLine = [float(val) for val in line]        datArr.append(curLine)    return mat(datArr)def pca(dataMat, topNfeat=9999999):    meanVals = mean(dataMat, axis=0)  # 求列的平均值    meanRemoved = dataMat - meanVals  # 去平均值    covMat = cov(meanRemoved, rowvar=0)  # 协方差矩阵    eigVals, eigVects = linalg.eig(mat(covMat))  # 计算特征值和特征向量    eigValInd = argsort(eigVals)  # 升序排序    # seq[start:end:step]    eigValInd = eigValInd[: - (topNfeat + 1):-1]  # 对N个值升序排序1    redEigVects = eigVects[:, eigValInd]  # 对N个值升序排序2    lowDDataMat = meanRemoved * redEigVects  # 将数据转换到新空间    reconMat = (lowDDataMat * redEigVects.T) + meanVals    return lowDDataMat, reconMatdef replaceNanWithMean():    datMat = loadDataSet('secom.data', ' ')    numFeat = shape(datMat)[1]    for i in range(numFeat):        # datMat[con1, col]:con1为条件.        # 这里datMat[:,i].A是取出第i列的元素,        # 通过~isnan()取得数字元素的引用,        # nonzero()构造非零矩阵,所有数值元素集中在第一行,        # datMat[nonzero(~isnan(datMat[:, i].A))[0], i]        # 代表取得第i列中对应非零矩阵第一行中的元素        meanVal = mean(datMat[nonzero(~isnan(datMat[:, i].A))[0], i])  # 计算平均值        datMat[nonzero(isnan(datMat[:, i].A))[0], i] = meanVal  # 对NAN元素赋予平均值    return datMat


原创粉丝点击