python语言编写KNN程序

来源:互联网 发布:2016淘宝双11成交额 编辑:程序博客网 时间:2024/06/03 14:40

Python版本为:3.2.3

代码为:

from numpy import*
import operator


#k邻近算法
def classify0(inX,dataSet,labels,k):
dataSetSize = dataSet.shape[0]  #数据矩阵的行
diffMat = tile(inX,(dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sum(sqDiffMat,axis=1)
distances = sqDistances**0.5
sortedDistIndicies = argsort(distances)
classCount = {}
for i in range(k):
voteLabel = labels[sortedDistIndicies[i]]
classCount[voteLabel] = classCount.get(voteLabel,0)+1
sortedClassCount = sorted(classCount.items(),key = operator.itemgetter(1),reverse=True)
return sortedClassCount[0][0]
#创建一些训练数据
def createDataSet():
group = array([[1,1.1],[1,1],[0,0],[0,0.1]])
labels = ['A','A','B','B']
return group,labels
#测试
data,label = createDataSet()
test_data = [0,0]
classify0(test_data,data,label,3)

将代码复制在Python的IDLE工具中时,不能直接复制粘贴,不然有些语句运行不了。

实际粘贴运行过程为:

>>> from numpy import*
>>> import operator
>>> def classify0(inX,dataSet,labels,k):
dataSetSize = dataSet.shape[0]  #数据矩阵的行
diffMat = tile(inX,(dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sum(sqDiffMat,axis=1)
distances = sqDistances**0.5
sortedDistIndicies = argsort(distances)
classCount = {}
for i in range(k):
voteLabel = labels[sortedDistIndicies[i]]
classCount[voteLabel] = classCount.get(voteLabel,0)+1
sortedClassCount = sorted(classCount.items(),key = operator.itemgetter(1),reverse=True)
return sortedClassCount[0][0]



>>> def createDataSet():
group = array([[1,1.1],[1,1],[0,0],[0,0.1]])
labels = ['A','A','B','B']
return group,labels


>>> data,label = createDataSet()
>>> data
array([[ 1. ,  1.1],
       [ 1. ,  1. ],
       [ 0. ,  0. ],
       [ 0. ,  0.1]])
>>> label
['A', 'A', 'B', 'B']
>>> test_data = [0,0]
>>> classify0(test_data,data,label,3)
'B'


0 0