python实现knn算法

来源:互联网 发布:苹果电脑 mac地址 编辑:程序博客网 时间:2024/05/17 09:07
import numpy as npimport operatordef createDataSet():    group = np.array([[1.0,1.1],[1.0,1.0],[0.0,0.0],[0.0,0.1]])    labels = ['A','A','B','B']    return group,labels#分类算法:inX待分类的点def classify0(inX,dataSet,labels,k):    dataSetSize = dataSet.shape[0] #取出行数,为了方便下一步让待分类的点扩充为矩阵    diffMat = np.tile(inX,(dataSetSize,1)) - dataSet #把点inX复制成dataSetSize行,1列的矩阵    sqDiffMat = diffMat ** 2    sqDistances = sqDiffMat.sum(axis = 1)#按行相加(x1-x2)**2 + (y1-y2)**2,因为数据已经是一维的了    distance = sqDistances ** 0.5    sortedDistance = distance.argsort()    classCount = {}    for i in range(k):        voteLabel = labels[sortedDistance[i]]        classCount[voteLabel] = classCount.get(voteLabel,0) + 1 #默认值为0,取出每个类别的数量    # 把{"类别":"次数"}变成[('类别','次数')]的格式,然后取次数字段,按降序排列    sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),reverse=True)    return sortedClassCount[0][0] #取出次数最多的类别 [('类别','次数')]  '类别'#把文件的数据转换成矩阵的格式def file2matrix(filename):    fr = open(filename)    arrayOfLines = fr.readline()#按行读取    numOfLines = len(arrayOfLines)#得出总行数    returnMat = np.zeros((numOfLines,3))#定义一个空的矩阵,numOfLines行,3列    classLabelVector = []    index = 0    for line in arrayOfLines:        line = line.strip()#去掉换行符/n,空格        listFromLine = line.split('\t')#每行数据按照\t进行分割        returnMat[index,:] = listFromLine[0:3]# :代表索引取到末尾,把数据填充到returnMat这个空矩阵        classLabelVector.append(int(listFromLine[-1]))#因为文本的数据是String类型,所以需要转换        index += 1 #一条记录加1    return returnMat, classLabelVector#对数据进行归一化处理def autoNorm(dataSet):    minVals = dataSet.min(0)#表示不同行相比较得出最小,得到的是一行数据    maxVals = dataSet.max(0)    ranges = maxVals - minVals    m = dataSet.shape[0]    normDataSet = np.zeros(np.shape(dataSet))#创建一个和dataSet一样的0矩阵    normDataSet = dataSet - np.tile(minVals,(m,1)) #把最小值的那一行复制成m行,列不变的矩阵,再被dataSet相减    normDataSet = normDataSet / np.tile(ranges,(m,1))#再除以最大值减去最小值的值    return normDataSet,ranges,minValsif __name__ == '__main__':    group, labels = createDataSet()    result = classify0([3,0.2],group,labels,3)    print (result)
0 0
原创粉丝点击