KNN分类算法Python实现

来源:互联网 发布:c语言volatile的作用 编辑:程序博客网 时间:2024/06/04 23:03
from numpy import *import operator# inputX - the input sample; dataset - sample dataset; labels - labels of the sample dataset; k is the number of nearest samples used in the classifier def knn_classify(inputX, dataset, labels, k):    # number of sample in dataset    num = dataset.shape[0]    diffMat = tile(inputX, (num, 1)) - dataset    sqDiffMat = diffMat**2    sqDistances = sqDiffMat.sum(axis=1)    distances = sqDistances**0.5    sortedIndicies = distances.argsort()    voteCount={}    for i in range(k):        voteLabel = labels[sortedIndicies[i]]        voteCount[voteLabel] = voteCount.get(voteLabel, 0) + 1    #sortedVoteCount = sorted(viteCount.iteritems(), key = lambda dic:dic[1], reverse = True)    sortedVoteCount = sorted(voteCount.iteritems(), key = operator.itemgetter(1), reverse = True)    return sortedVoteCount[0][0]# read dataset from file, each line in file is a sample, three features and one labeldef file2matrix(filename):    file = open(filename)    arrayOfLines = file.readlines()    numOfLines = len(arrayOfLines)    numOfFeatures = 3    dataset = zeros((numOfLines, numOfFeatures))    labelVector = []    index = 0    for line in arrayOfLines:        line = line.strip()        sample = line.split('\t')        dataset[index,:] = sample[0:numOfFeatures]        labelVector.append(int(sample[-1]))        index = index + 1    return dataset, labelVector
0 0