机器学习实战(一)——kNN算法

来源:互联网 发布:京东优惠券软件 编辑:程序博客网 时间:2024/06/06 09:10

最近选修了计算机学院的模式识别课,其中讲到了Parzen窗和kNN算法这两种对于总体的非参数估计方法,之前在机器学习实战中用kNN算法进行了手写数字识别,今天继续用Parzen窗原理的PNN(概率神经网络)算法写出python程序比较一下优劣。
PNN算法将是我写的第一个需要训练的机器学习程序。
2016.3.15

———分割线,以下为之前内容,kNN算法—————————————————————

kNN.py

# -*- coding: utf-8 -*-############KNN: the k nearest neighbours###########from numpy import *import operatorimport os###############new_input : a matrix with [1,1024]#dataset   : a matrix with [num,1024]#labels    : 0:9# k        : the k in knndef kNNClassify(new_input,dataset,labels,k):#cal the distance    num = dataset.shape[0]    subMatrix=tile(new_input,[num,1])    dist=subMatrix-dataset    dist=dist**2    #distance=dist.sum(axis=1)    distance=sum(dist,axis=1)    disttance=distance**0.5#sort    sortedDistIndice=argsort(distance)#cal the most    voteCount={}    for i in xrange(k):        voteIndice=labels[sortedDistIndice[i]]        #cal the votes        voteCount[voteIndice]=voteCount.get(voteIndice,0)+1    #max votecount    maxCount = 0    for key,value in voteCount.items():        if value > maxCount:            maxCount = value            maxIndex = key    return maxIndexdef img2Vector(filename):    rows = 32    cols = 32    imgVector = zeros([1,rows*cols])    fileIn = open(filename)    for row in xrange(rows):        lineStr = fileIn.readline()        for col in xrange(cols):            imgVector[0,row*cols+col] = int(lineStr[col])    return imgVectordef loadDataSet():    #getting training set    print 'getting training set'    rows = 32    cols = 32    dirTrain = './1/'    fileTrain = os.listdir('./1/')    train_x = zeros((len(fileTrain),rows*cols))    #labels = []    #zeros(1,len(filename))    train_y =[]    for i in xrange(len(fileTrain)):        dirFile=dirTrain+fileTrain[i]        train_x[i,:] = img2Vector(dirFile)        label = int(fileTrain[i].split('_')[0])        train_y.append(label)    print 'getting test set'    dirTest = './2/'    fileTest = os.listdir('./2/')    test_x=zeros((len(fileTest),rows*cols))    test_y=[]    for j in xrange(len(fileTest)):        dirFile=dirTest+fileTest[j]        test_x[j,:]=img2Vector(dirFile)        label = int(fileTest[j].split('_')[0])        test_y.append(label)    return train_x,train_y,test_x,test_ydef testHandWriting():    print 'loading data'    train_x,train_y,test_x,test_y =loadDataSet()    print 'traning'    pass    print 'testing'    numTestSamples = test_x.shape[0]    matchCount = 0    for i in xrange(numTestSamples):        predict = kNNClassify(test_x[i],train_x,train_y,3)        if predict == test_y[i]:            matchCount += 1    accuracy = float(matchCount)/numTestSamples    print 'show the result...\n'    print '%.2f%%' %(accuracy*100)

test_kNN.py

#test-kNNimport kNNkNN.testHandWriting()
0 0
原创粉丝点击