<machine learning traning> KNN

来源:互联网 发布:阿根廷生活知乎 编辑:程序博客网 时间:2024/06/06 02:07


func: open().readline.strip().split('\t');zeros,index+=1

#func:add_subplot,scatter

func: min(0),tile

from numpy import *import operatordef createDataSet():    group=array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])    labels=['A','A','B','B']    return group,labelsdef classify0(inX,dataSet,labels,k):    dataSetSize=dataSet.shape[0]    diffMat=tile(inX,(dataSetSize,1))-dataSet    #tile(A,reps)    sqDiffMat=diffMat**2    sqDistance=sqDiffMat.sum(axis=1)    distances=sqDistance**0.5    sortedDistIndicies=distances.argsort()    classCount={}    for i in range(k):        votelabel=labels[sortedDistIndicies[i]]        classCount[votelabel]=classCount.get(votelabel,0)+1    sortedClassCount=sorted(classCount.iteritems(),key=operator.itemgetter(1),reverse=True)    return sortedClassCount[0][0]def filename(filename):classLabel=[]returnMat=zeros((len(open(filename).readlines()),3))index=0for line in open(filename).readlines():line=line.strip().split('\t')if line!=['']:classLabel.append(int(line[-1]))returnMat[index,:]=line[0:3]index+=1returnMat=returnMat[:len(classLabel),:]return returnMat,classLabeldef autonorm(data):min=data.min(0)max=data.max(0)range=max-minnorm=data-tile(min,(data.shape[0],1))norm=norm/tile(range,(data.shape[0],1))return norm,range,mindef test():error=0data,label=filename('dating.txt')norm,ranges,min=autonorm(data)m=data.shape[0]n=int(0.1*m)for i in range(n):result=classify0(norm[i,:],norm[n:m,:],label[n:m],3)if(result!=label[i]):error+=1print result,label[i]print "the error ratio is %f" %(error/float(n))def classify():resultlist=['not at all','in small doses','in large doses']a=float(raw_input("the first argument: "))b=float(raw_input("the second argument: "))c=float(raw_input("the third argument: "))x=array([a,b,c])data,label=filename('dating.txt')norm,ranges,mini=autonorm(data)result=classify0((x-mini)/ranges,norm,label,3)print "you probably like this person: %s" %(resultlist[result-1])

阅读全文
0 0
原创粉丝点击