Machine Learning 笔记之KNN算法

来源:互联网 发布:淘宝刻印落款加钱 编辑:程序博客网 时间:2024/06/06 01:21
#coding=utf8#data 为 [[data],class] 形式import mathclass trainSet:    def __init__(self,data,clas):        self.Data=[]        for i in range(len(clas)):            self.Data.append([data[i],clas[i]])        self.clas=lis=list(set(clas))class KNN:    def __init__(self,trainSet,data,k):        self.trainSet=trainSet        self.clas=trainSet.clas        self.k=k        self.data=data        self.result=self.__getResult()    def __getResult(self):        kDistance=[[]for i in range(len(self.trainSet.clas))]        kdistance=[0 for i in range(len(self.trainSet.clas))]        #print kDistance        for i in range(len(self.trainSet.Data)):            #print self.trainSet.Data[i]            distance=self.__calDidtance(self.trainSet.Data[i][0],self.data)            #print kDistance[self.__getIndex(self.trainSet.Data[i][1])]            kDistance[self.__getIndex(self.trainSet.Data[i][1])].append(distance)        for i in range(len(kDistance)):            sumd=0            kDistance[i]=sorted(kDistance[i])            kDistance[i]=kDistance[i][0:self.k]            for j in range(len(kDistance[i])):                sumd+=kDistance[i][j]            sumd=1.0*sumd/len(kDistance[i])            kdistance[i]=sumd        self.result=self.clas[self.__findMin(kdistance)]        #print 'aa',kDistance,kdistance,self.clas        return self.result    def __calDidtance(self,data1,data2):        distance=0        for i in range(len(data1)):            distance+=(data1[i]-data2[i])**2        distance=math.sqrt(distance)        return distance    def __getIndex(self,str1):        index=0        for i in range(len(self.clas)):            if self.clas[i]==str1:               index=i        #print 's',self.clas,str1,index        return index    def __findMin(self,kdistance):        minD=999        index=0        for t in range(len(kdistance)):            if minD>kdistance[t]:               index=t               minD=kdistance[t]        return indexdata=[[1,1],[1,1.1],[2,2],[2,2.5]]cls=['A','A','B','B']train=trainSet(data,cls)inputData=[1.6,1.2]knn1=KNN(train,inputData,2)print "The input is : ", inputDataprint "The resullt is : ", knn1.result