SVM和Knn实现手写数字识别

来源：互联网发布：佳明gps如何清除数据编辑：程序博客网时间：2024/04/28 12:30

数据和程序下载地址：手写数字识别

调用SVM库实现数字识别

# Standard scientific Python importsimport matplotlib.pyplot as pltimport numpy as np# Import datasets, classifiers and performance metricsfrom sklearn import datasets, svm, metrics# The digits datasetdigits = datasets.load_digits()print(digits.data) # Load the dataset# dataset = datasets.fetch_mldata("MNIST Original")# Extract the features and labels# features = np.array(dataset.data, 'int16') # labels = np.array(dataset.target, 'int')# print(labels)

[[  0.   0.   5. ...,   0.   0.   0.] [  0.   0.   0. ...,  10.   0.   0.] [  0.   0.   0. ...,  16.   9.   0.] ...,  [  0.   0.   1. ...,   6.   0.   0.] [  0.   0.   2. ...,  12.   0.   0.] [  0.   0.  10. ...,  12.   1.   0.]]

print(digits.target)digits.images[1]

[0 1 2 ..., 8 9 8]array([[  0.,   0.,   0.,  12.,  13.,   5.,   0.,   0.],       [  0.,   0.,   0.,  11.,  16.,   9.,   0.,   0.],       [  0.,   0.,   3.,  15.,  16.,   6.,   0.,   0.],       [  0.,   7.,  15.,  16.,  16.,   2.,   0.,   0.],       [  0.,   0.,   1.,  16.,  16.,   3.,   0.,   0.],       [  0.,   0.,   1.,  16.,  16.,   6.,   0.,   0.],       [  0.,   0.,   1.,  16.,  16.,   6.,   0.,   0.],       [  0.,   0.,   0.,  11.,  16.,  10.,   0.,   0.]])

# The data that we are interested in is made of 8x8 images of digits, let's# have a look at the first 3 images, stored in the `images` attribute of the# dataset.  If we were working from image files, we could load them using# pylab.imread.  Note that each image must have the same size. For these# images, we know which digit they represent: it is given in the 'target' of# the dataset.images_and_labels = list(zip(digits.images, digits.target))for index, (image, label) in enumerate(images_and_labels[:4]):    plt.subplot(2, 4, index + 1)    plt.axis('off')    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')    plt.title('Training: %i' % label)plt.show()# To apply a classifier on this data, we need to flatten the image, to# turn the data in a (samples, feature) matrix:n_samples = len(digits.images)print('number of sample:%s'% n_samples)data = digits.images.reshape((n_samples, -1))# Create a classifier: a support vector classifierclassifier = svm.SVC(gamma=0.001)# We learn the digits on the first half of the digitsclassifier.fit(data[:n_samples / 2], digits.target[:n_samples / 2])# Now predict the value of the digit on the second half:expected = digits.target[n_samples / 2:]predicted = classifier.predict(data[n_samples / 2:])print("Classification report for classifier %s:\n%s\n"      % (classifier, metrics.classification_report(expected, predicted)))print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))images_and_predictions = list(zip(digits.images[n_samples / 2:], predicted))for index, (image, prediction) in enumerate(images_and_predictions[:4]):    plt.subplot(2, 4, index + 5)    plt.axis('off')    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')    plt.title('Prediction: %i' % prediction)plt.show()

这里写图片描述

number of sample:1797Classification report for classifier SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',  max_iter=-1, probability=False, random_state=None, shrinking=True,  tol=0.001, verbose=False):             precision    recall  f1-score   support          0       1.00      0.99      0.99        88          1       0.99      0.97      0.98        91          2       0.99      0.99      0.99        86          3       0.98      0.87      0.92        91          4       0.99      0.96      0.97        92          5       0.95      0.97      0.96        91          6       0.99      0.99      0.99        91          7       0.96      0.99      0.97        89          8       0.94      1.00      0.97        88          9       0.93      0.98      0.95        92avg / total       0.97      0.97      0.97       899Confusion matrix:[[87  0  0  0  1  0  0  0  0  0] [ 0 88  1  0  0  0  0  0  1  1] [ 0  0 85  1  0  0  0  0  0  0] [ 0  0  0 79  0  3  0  4  5  0] [ 0  0  0  0 88  0  0  0  0  4] [ 0  0  0  0  0 88  1  0  0  2] [ 0  1  0  0  0  0 90  0  0  0] [ 0  0  0  0  0  1  0 88  0  0] [ 0  0  0  0  0  0  0  0 88  0] [ 0  0  0  1  0  1  0  0  0 90]]C:\Program Files\Anaconda3\lib\site-packages\ipykernel\__main__.py:25: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the futureC:\Program Files\Anaconda3\lib\site-packages\ipykernel\__main__.py:28: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the futureC:\Program Files\Anaconda3\lib\site-packages\ipykernel\__main__.py:29: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the futureC:\Program Files\Anaconda3\lib\site-packages\ipykernel\__main__.py:35: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future

这里写图片描述

编写Knn实现手写数字识别

from numpy import *import operatorfrom os import listdirdef classify0(inX, dataSet, labels, k):    dataSetSize = dataSet.shape[0]                      diffMat = tile(inX, (dataSetSize,1)) - dataSet          sqDiffMat = diffMat**2    sqDistances = sqDiffMat.sum(axis=1)                      distances = sqDistances**0.5    sortedDistIndicies = distances.argsort()                classCount={}                                          for i in range(k):        voteIlabel = labels[sortedDistIndicies[i]]        classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1    #sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)    sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)    return sortedClassCount[0][0]def img2vector(filename):    returnVect = zeros((1,1024))    fr = open(filename)    for i in range(32):        lineStr = fr.readline()        for j in range(32):            returnVect[0,32*i+j] = int(lineStr[j])    return returnVectdef handwritingClassTest():    hwLabels = []    trainingFileList = listdir('trainingDigits')              m = len(trainingFileList)    trainingMat = zeros((m,1024))    for i in range(m):        fileNameStr = trainingFileList[i]                          fileStr = fileNameStr.split('.')[0]                        classNumStr = int(fileStr.split('_')[0])                  hwLabels.append(classNumStr)        trainingMat[i,:] = img2vector('trainingDigits/%s' % fileNameStr)    testFileList = listdir('testDigits')           errorCount = 0.0    mTest = len(testFileList)    for i in range(mTest):        fileNameStr = testFileList[i]        fileStr = fileNameStr.split('.')[0]             classNumStr = int(fileStr.split('_')[0])        vectorUnderTest = img2vector('testDigits/%s' % fileNameStr)        classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)        print ("the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr))        if (classifierResult != classNumStr): errorCount += 1.0    print ("\nthe total number of errors is: %d" % errorCount)    print ("\nthe total error rate is: %f" % (errorCount/float(mTest)))

handwritingClassTest()

the classifier came back with: 0, the real answer is: 0the classifier came back with: 0, the real answer is: 0the classifier came back with: 0, the real answer is: 0the classifier came back with: 0, the real answer is: 0the classifier came back with: 0, the real answer is: 0the classifier came back with: 1, the real answer is: 1the classifier came back with: 1, the real answer is: 1the classifier came back with: 1, the real answer is: 1the classifier came back with: 1, the real answer is: 1the classifier came back with: 1, the real answer is: 1the classifier came back with: 2, the real answer is: 2the classifier came back with: 2, the real answer is: 2the classifier came back with: 2, the real answer is: 2the classifier came back with: 2, the real answer is: 2the classifier came back with: 2, the real answer is: 2the classifier came back with: 3, the real answer is: 3the classifier came back with: 3, the real answer is: 3the classifier came back with: 3, the real answer is: 3the classifier came back with: 3, the real answer is: 3the classifier came back with: 3, the real answer is: 3the classifier came back with: 4, the real answer is: 4the classifier came back with: 4, the real answer is: 4the classifier came back with: 4, the real answer is: 4the classifier came back with: 4, the real answer is: 4the classifier came back with: 4, the real answer is: 4the classifier came back with: 5, the real answer is: 5the classifier came back with: 5, the real answer is: 5the classifier came back with: 5, the real answer is: 5the classifier came back with: 5, the real answer is: 5the classifier came back with: 5, the real answer is: 5the classifier came back with: 6, the real answer is: 6the classifier came back with: 6, the real answer is: 6the classifier came back with: 6, the real answer is: 6the classifier came back with: 6, the real answer is: 6the classifier came back with: 6, the real answer is: 6the classifier came back with: 7, the real answer is: 7the classifier came back with: 7, the real answer is: 7the classifier came back with: 7, the real answer is: 7the classifier came back with: 7, the real answer is: 7the classifier came back with: 7, the real answer is: 7the classifier came back with: 8, the real answer is: 8the classifier came back with: 8, the real answer is: 8the classifier came back with: 8, the real answer is: 8the classifier came back with: 8, the real answer is: 8the classifier came back with: 8, the real answer is: 8the classifier came back with: 9, the real answer is: 9the classifier came back with: 9, the real answer is: 9the classifier came back with: 9, the real answer is: 9the classifier came back with: 9, the real answer is: 9the classifier came back with: 9, the real answer is: 9the total number of errors is: 0the total error rate is: 0.000000

0 0