knn算法实例(python)

来源:互联网 发布:华为软件视频会议 编辑:程序博客网 时间:2024/06/07 12:27

参考地址(里面有解释和原数据)

import csvimport randomimport mathimport operatordef loadDataset(filename,split,trainingSet=[],testSet=[]):    # 注意这儿加上'b'模式会出错,因为csv文件与普通文件不一样    with open(filename, 'r') as csvfile:        lines = csv.reader(csvfile)        dataset = list(lines)        for x in range(len(dataset)-1):            for y in range(4):                dataset[x][y] = float(dataset[x][y])                if random.random() < split:                    trainingSet.append(dataset[x])                else:                    testSet.append(dataset[x])def euclideanDistance(instance1, instance2, length):    distance = 0    for x in range(length):        distance += pow(instance1[x] - instance2[x], 2)    return math.sqrt(distance)# test for function euclideanDistance# data1 = [2, 2, 2, 'a']# data2 = [4, 4, 4, 'b']# distance = euclideanDistance(data1, data2, 3)# print(distance)def getNeighbors(trainingSet, testInstance, k):    distances = []    length = len(testInstance) - 1    for x in range(len(trainingSet)):        dist = euclideanDistance(testInstance, trainingSet[x], length)        distances.append((trainingSet[x], dist))    # print(distances)    distances.sort(key=operator.itemgetter(1))    # print(distances)    neighbors = []    for x in range(k):        neighbors.append(distances[x][0])    return neighbors# test for function getNeighbors# trainSet = [[2, 2, 2, 'a'], [4, 4, 4, 'b'],[4.5, 4.5, 4.5, 'c']]# testInstance = [5, 5, 5]# k = 1# neighbors = getNeighbors(trainSet, testInstance, 1)# print(neighbors)def getResponse(neighbors):    classVotes = {}    for x in range(len(neighbors)):        response = neighbors[x][-1]        if response in classVotes:            classVotes[response] += 1        else:            classVotes[response] = 1    # py3.+使用 items() 与2.+的 iteritems()  不同    sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True)    return sortedVotes[0][0]# test for function getResponse# neighbors = [[1, 1, 1, 'a'], [2, 2, 2, 'a'], [3, 3, 3, 'b']]# response = getResponse(neighbors)# print(response)def getAccuracy(testSet, predictions):    correct = 0    for x in range(len(testSet)):        if testSet[x][-1] == predictions[x]:            correct += 1    return (correct/float(len(testSet)))*100.0def main():    # prepare data    trainingSet = []    testSet = []    loadDataset('f:/iris.csv', 0.66, trainingSet, testSet)    print("Train" + repr(len(trainingSet)))    print("Test" + repr(len(testSet)))    # print(trainingSet)    # generate predictions    predictions = []    k = 3    for x in range(len(testSet)):        neighbors = getNeighbors(trainingSet, testSet[x], k)        # print(neighbors)        result = getResponse(neighbors)        predictions.append(result)        print('> predicted=' + repr(result) + ', actual=' + repr(testSet[x][-1]))    accuraty = getAccuracy(testSet, predictions)    print('Accuracy: ' + repr(accuraty) + '%')main()
原创粉丝点击