MLiA SVM心得
来源:互联网 发布:九阴绝学声望回收数据 编辑:程序博客网 时间:2024/05/16 05:44
svm部分的理论部分昨天看完的,也是有些地方似懂非懂,不过大体上了解了。
今天完成了代码部分运行。最后有个例子是手写测试的,之前KNN时的例子,现在拿出来比较SVM的效果。
文章首先介绍了没有使用核函数的SVM分类方法,我自己电脑运行实在是慢,就把代码贴上来。
SVM主要在于核函数的选择,还有thegma的选择。其中有几个概念:内核,thegma,训练错误率,测试错误率,支持向量数。
支持向量机之所以称为“机”,是因为它会产生一个二值决策结果,即它是一种决策机。
核方法(核技巧)将数据从低维空间映射到高维空间。
下面就没什么说了,上代码了(明天开始看LDA,一个新的任务~。~):
# -*- coding: cp936 -*-__author__ = 'Administrator'from numpy import *import numpy as npimport datetimestartTime = datetime.datetime.now()def loadDataSet(filenName): dataMat = [] labelMat = [] fr = open(filenName) for line in fr.readlines(): lineArr = line.strip().split('\t') dataMat.append([float(lineArr[0]), float(lineArr[1])]) labelMat.append(float(lineArr[2])) return dataMat, labelMatdef selectJrand(i, m): j = i while j == i: j = int(random.uniform(0, m)) return jdef clipAlpha(aj, H, L): if aj > H: aj = H if L > aj: aj = L return ajdef smoSimple(dataMatIn, classLabels, C, toler, maxIter): dataMatrix = mat(dataMatIn) labelMat = mat(classLabels).transpose() b = 0 m, n = shape(dataMatrix) alphas = mat(zeros((m, 1))) iter = 0 while iter < maxIter: alphaPairsChanged = 0 for i in range(m): fXi = float(np.multiply(alphas, labelMat).T * (dataMatrix * dataMatrix[i, :].T)) + b Ei = fXi - float(labelMat[i]) if ((labelMat[i] * Ei < -toler) and (alphas[i] < C)) or ((labelMat[i] * Ei > toler) and (alphas[i] > 0)): j = selectJrand(i, m) fXj = float(np.multiply(alphas, labelMat).T *(dataMatrix * dataMatrix[j, :].T)) + b Ej = fXj - float(labelMat[j]) alphaIold = alphas[i].copy() alphaIold = alphas[j].copy() if labelMat[i] != labelMat[j]: L = max(0, alphas[j] - alphas[i]) H = min(C, C + alphas[j] - alphas[i]) else: L = max(0, alphas[j] + alphas[i] - C) H = min(C, alphas[j] + alphas[i]) if L == H: print 'L==H' continue eta = 2.0 * dataMatrix[i, :] * dataMatrix[j, :].T - dataMatrix[i, :] * dataMatrix[i, :].T - dataMatrix[j, :] * dataMatrix[j, :].T if eta >= 0: print 'eta>=o' continue alphas[j] -= labelMat[j] * (Ei - Ej) / eta alphas[j] = clipAlpha(alphas[j], H, L) if abs(alphas[j] - alphaIold) < 0.00001: print 'j not moving enough' continue alphas[i] += labelMat[j] * labelMat[i] * (alphaIold - alphas[j]) b1 = b - Ei - labelMat[i] * (alphas[i] - alphaIold) * dataMatrix[i, :] * dataMatrix[i, :].T - labelMat[j] * (alphas[j] - alphaIold) *dataMatrix[i, :] * dataMatrix[j, :].T b2 = b - Ej - labelMat[i] * (alphas[i] - alphaIold) * dataMatrix[i, :] * dataMatrix[j, :].T - labelMat[j] * (alphas[j] - alphaIold) * dataMatrix[j, :] * dataMatrix[j, :].T if (0 < alphas[i]) and (C > alphas[i]): b = b1 elif (0 < alphas[j]) and (C > alphas[j]): b = b2 else: b = (b1 + b2) / 2.0 alphaPairsChanged += 1 print 'iter: %d i:%d, pairs changed %d' % (iter, i, alphaPairsChanged) if alphaPairsChanged == 0: iter += 1 else: iter = 0 print 'iteration number; %d' % iter return b, alphas# dataArr, labelArr = loadDataSet('testSet.txt')# print dataArr# b, alphas = smoSimple(dataArr, labelArr, 0.6, 0.001, 40)# print b# print alphas# print shape(alphas[alphas > 0])def kernelTrans(X, A, kTup): #calc the kernel or transform data to a higher dimensional space m,n = shape(X) K = mat(zeros((m,1))) if kTup[0]=='lin': K = X * A.T #linear kernel elif kTup[0]=='rbf': for j in range(m): deltaRow = X[j,:] - A K[j] = deltaRow*deltaRow.T K = np.exp(K/(-1*kTup[1]**2)) #divide in NumPy is element-wise not matrix like Matlab else: raise NameError('Houston We Have a Problem -- \ That Kernel is not recognized') return K# class optStruct:# def __init__(self,dataMatIn, classLabels, C, toler): # Initialize the structure with the parameters# self.X = dataMatIn# self.labelMat = classLabels# self.C = C# self.tol = toler# self.m = shape(dataMatIn)[0]# self.alphas = mat(zeros((self.m,1)))# self.b = 0# self.eCache = mat(zeros((self.m,2))) #first column is valid flag# # self.K = mat(zeros((self.m,self.m)))# # for i in range(self.m):# # self.K[:,i] = kernelTrans(self.X, self.X[i,:], kTup)class optStruct: def __init__(self,dataMatIn, classLabels, C, toler,kTup): # Initialize the structure with the parameters self.X = dataMatIn self.labelMat = classLabels self.C = C self.tol = toler self.m = shape(dataMatIn)[0] self.alphas = mat(zeros((self.m,1))) self.b = 0 self.eCache = mat(zeros((self.m,2))) #first column is valid flag self.K = mat(zeros((self.m,self.m))) for i in range(self.m): self.K[:,i] = kernelTrans(self.X, self.X[i,:], kTup)# def calcEk(oS, k):# fXk = float(np.multiply(oS.alphas,oS.labelMat).T*(oS.X*oS.X[k,:].T)) + oS.b# Ek = fXk - float(oS.labelMat[k])# return Ekdef calcEk(oS, k): fXk = float(np.multiply(oS.alphas,oS.labelMat).T*oS.K[:,k] + oS.b) Ek = fXk - float(oS.labelMat[k]) return Ekdef selectJ(i, oS, Ei): #this is the second choice -heurstic, and calcs Ej maxK = -1; maxDeltaE = 0; Ej = 0 oS.eCache[i] = [1,Ei] #set valid #choose the alpha that gives the maximum delta E validEcacheList = nonzero(oS.eCache[:,0].A)[0] if (len(validEcacheList)) > 1: for k in validEcacheList: #loop through valid Ecache values and find the one that maximizes delta E if k == i: continue #don't calc for i, waste of time Ek = calcEk(oS, k) deltaE = abs(Ei - Ek) if (deltaE > maxDeltaE): maxK = k; maxDeltaE = deltaE; Ej = Ek return maxK, Ej else: #in this case (first time around) we don't have any valid eCache values j = selectJrand(i, oS.m) Ej = calcEk(oS, j) return j, Ejdef updateEk(oS, k):#after any alpha has changed update the new value in the cache Ek = calcEk(oS, k) oS.eCache[k] = [1,Ek]# def innerL(i, oS):# Ei = calcEk(oS, i)# if ((oS.labelMat[i]*Ei < -oS.tol) and (oS.alphas[i] < oS.C)) or ((oS.labelMat[i]*Ei > oS.tol) and (oS.alphas[i] > 0)):# j,Ej = selectJ(i, oS, Ei) #this has been changed from selectJrand# alphaIold = oS.alphas[i].copy(); alphaJold = oS.alphas[j].copy();# if (oS.labelMat[i] != oS.labelMat[j]):# L = max(0, oS.alphas[j] - oS.alphas[i])# H = min(oS.C, oS.C + oS.alphas[j] - oS.alphas[i])# else:# L = max(0, oS.alphas[j] + oS.alphas[i] - oS.C)# H = min(oS.C, oS.alphas[j] + oS.alphas[i])# if L==H: print "L==H"; return 0# eta = 2.0 * oS.X[i,:] * oS.X[j,:].T - oS.X[i,:]*oS.X[i,:].T - oS.X[j,:]*oS.X[j,:].T #changed for kernel# if eta >= 0: print "eta>=0"; return 0# oS.alphas[j] -= oS.labelMat[j]*(Ei - Ej)/eta# oS.alphas[j] = clipAlpha(oS.alphas[j],H,L)# updateEk(oS, j) #added this for the Ecache# if (abs(oS.alphas[j] - alphaJold) < 0.00001): print "j not moving enough"; return 0# oS.alphas[i] += oS.labelMat[j]*oS.labelMat[i]*(alphaJold - oS.alphas[j])#update i by the same amount as j# updateEk(oS, i) #added this for the Ecache #the update is in the oppostie direction# b1 = oS.b - Ei- oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.X[i,:]*oS.X[i,:].T - oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.X[i,:]*oS.X[j,:].T# b2 = oS.b - Ej- oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.X[i,:]*oS.X[j,:].T - oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.X[j,:]*oS.X[j,:].T# if (0 < oS.alphas[i]) and (oS.C > oS.alphas[i]): oS.b = b1# elif (0 < oS.alphas[j]) and (oS.C > oS.alphas[j]): oS.b = b2# else: oS.b = (b1 + b2)/2.0# return 1# else: return 0def innerL(i, oS): Ei = calcEk(oS, i) if ((oS.labelMat[i]*Ei < -oS.tol) and (oS.alphas[i] < oS.C)) or ((oS.labelMat[i]*Ei > oS.tol) and (oS.alphas[i] > 0)): j,Ej = selectJ(i, oS, Ei) #this has been changed from selectJrand alphaIold = oS.alphas[i].copy(); alphaJold = oS.alphas[j].copy(); if (oS.labelMat[i] != oS.labelMat[j]): L = max(0, oS.alphas[j] - oS.alphas[i]) H = min(oS.C, oS.C + oS.alphas[j] - oS.alphas[i]) else: L = max(0, oS.alphas[j] + oS.alphas[i] - oS.C) H = min(oS.C, oS.alphas[j] + oS.alphas[i]) if L==H: print "L==H"; return 0 eta = 2.0 * oS.K[i,j] - oS.K[i,i] - oS.K[j,j] #changed for kernel if eta >= 0: print "eta>=0"; return 0 oS.alphas[j] -= oS.labelMat[j]*(Ei - Ej)/eta oS.alphas[j] = clipAlpha(oS.alphas[j],H,L) updateEk(oS, j) #added this for the Ecache if (abs(oS.alphas[j] - alphaJold) < 0.00001): print "j not moving enough"; return 0 oS.alphas[i] += oS.labelMat[j]*oS.labelMat[i]*(alphaJold - oS.alphas[j])#update i by the same amount as j updateEk(oS, i) #added this for the Ecache #the update is in the oppostie direction b1 = oS.b - Ei- oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.K[i,i] - oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.K[i,j] b2 = oS.b - Ej- oS.labelMat[i]*(oS.alphas[i]-alphaIold)*oS.K[i,j]- oS.labelMat[j]*(oS.alphas[j]-alphaJold)*oS.K[j,j] if (0 < oS.alphas[i]) and (oS.C > oS.alphas[i]): oS.b = b1 elif (0 < oS.alphas[j]) and (oS.C > oS.alphas[j]): oS.b = b2 else: oS.b = (b1 + b2)/2.0 return 1 else: return 0def smoP(dataMatIn, classLabels, C, toler, maxIter,kTup=('lin', 0)): #full Platt SMO # oS = optStruct(mat(dataMatIn),mat(classLabels).transpose(),C,toler) oS = optStruct(mat(dataMatIn),mat(classLabels).transpose(),C,toler, kTup)#改动 iter = 0 entireSet = True; alphaPairsChanged = 0 while (iter < maxIter) and ((alphaPairsChanged > 0) or (entireSet)): alphaPairsChanged = 0 if entireSet: #go over all for i in range(oS.m): alphaPairsChanged += innerL(i,oS) print "fullSet, iter: %d i:%d, pairs changed %d" % (iter,i,alphaPairsChanged) iter += 1 else:#go over non-bound (railed) alphas nonBoundIs = nonzero((oS.alphas.A > 0) * (oS.alphas.A < C))[0] for i in nonBoundIs: alphaPairsChanged += innerL(i,oS) print "non-bound, iter: %d i:%d, pairs changed %d" % (iter,i,alphaPairsChanged) iter += 1 if entireSet: entireSet = False #toggle entire set loop elif (alphaPairsChanged == 0): entireSet = True print "iteration number: %d" % iter return oS.b,oS.alphasdef calcWs(alphas,dataArr,classLabels): X = mat(dataArr); labelMat = mat(classLabels).transpose() m,n = shape(X) w = zeros((n,1)) for i in range(m): w += np.multiply(alphas[i]*labelMat[i],X[i,:].T) return wdef testRbf(k1=1.3): dataArr,labelArr = loadDataSet('testSetRBF.txt') b,alphas = smoP(dataArr, labelArr, 200, 0.0001, 10000, ('rbf', k1)) #C=200 important datMat=mat(dataArr); labelMat = mat(labelArr).transpose() svInd=nonzero(alphas.A>0)[0] sVs=datMat[svInd] #get matrix of only support vectors labelSV = labelMat[svInd]; print "there are %d Support Vectors" % shape(sVs)[0] m,n = shape(datMat) errorCount = 0 for i in range(m): kernelEval = kernelTrans(sVs,datMat[i,:],('rbf', k1)) predict=kernelEval.T * np.multiply(labelSV,alphas[svInd]) + b if np.sign(predict)!=np.sign(labelArr[i]): errorCount += 1 print "the training error rate is: %f" % (float(errorCount)/m) dataArr,labelArr = loadDataSet('testSetRBF2.txt') errorCount = 0 datMat=mat(dataArr); labelMat = mat(labelArr).transpose() m,n = shape(datMat) for i in range(m): kernelEval = kernelTrans(sVs,datMat[i,:],('rbf', k1)) predict=kernelEval.T * np.multiply(labelSV,alphas[svInd]) + b if np.sign(predict)!=np.sign(labelArr[i]): errorCount += 1 print "the test error rate is: %f" % (float(errorCount)/m)# print testRbf()def img2vector(filename): returnVect = zeros((1,1024)) fr = open(filename) for i in range(32): lineStr = fr.readline() for j in range(32): returnVect[0,32*i+j] = int(lineStr[j]) return returnVectdef loadImages(dirName): from os import listdir hwLabels = [] trainingFileList = listdir(dirName) #load the training set m = len(trainingFileList) trainingMat = zeros((m,1024)) for i in range(m): fileNameStr = trainingFileList[i] fileStr = fileNameStr.split('.')[0] #take off .txt classNumStr = int(fileStr.split('_')[0]) if classNumStr == 9: hwLabels.append(-1) else: hwLabels.append(1) trainingMat[i,:] = img2vector('%s/%s' % (dirName, fileNameStr)) return trainingMat, hwLabelsdef testDigits(kTup=('rbf', 10)): dataArr,labelArr = loadImages('trainingDigits') b,alphas = smoP(dataArr, labelArr, 200, 0.0001, 10000, kTup) datMat=mat(dataArr); labelMat = mat(labelArr).transpose() svInd=nonzero(alphas.A>0)[0] sVs=datMat[svInd] labelSV = labelMat[svInd]; print "there are %d Support Vectors" % shape(sVs)[0] m,n = shape(datMat) errorCount = 0 for i in range(m): kernelEval = kernelTrans(sVs,datMat[i,:],kTup) predict=kernelEval.T * np.multiply(labelSV,alphas[svInd]) + b if np.sign(predict)!=np.sign(labelArr[i]): errorCount += 1 print "the training error rate is: %f" % (float(errorCount)/m) dataArr,labelArr = loadImages('testDigits') errorCount = 0 datMat=mat(dataArr); labelMat = mat(labelArr).transpose() m,n = shape(datMat) for i in range(m): kernelEval = kernelTrans(sVs,datMat[i,:],kTup) predict=kernelEval.T * np.multiply(labelSV,alphas[svInd]) + b if np.sign(predict)!=np.sign(labelArr[i]): errorCount += 1 print "the test error rate is: %f" % (float(errorCount)/m)print testDigits(('rbf',20))# dataArr, labelArr = loadDataSet('testSet.txt')# b, alphas = smoP(dataArr, labelArr, 0.6, 0.001, 40)# ws=calcWs(alphas,dataArr,labelArr)# datMat=mat(dataArr)# print(datMat[0]*mat(ws)+b)# print labelArr[0]# print ws# # print dataArr# print b# print alphas# # print shape(alphas[alphas > 0])endTime = datetime.datetime.now()last = endTime - startTimeprint last
0 0
- MLiA SVM心得
- MLiA SVM(三层境界读后感)
- MLiA knn
- MLiA AdaBoost
- 学习SVM模型心得1
- eCognition SVM 分层分类心得
- MLiA ID3 DecisionTree
- MLiA 朴素贝叶斯
- MLiA Logistic回归
- 关于调试SVM的一点心得
- OpenCv3.0+SVM的使用心得(一)
- OpenCv3.0+SVM的使用心得(二)
- OpenCv3.0+SVM的使用心得(一)
- OpenCv3.0+SVM的使用心得(二)
- opencv3.1 svm(支持向量机)使用心得
- SVM
- SVM
- SVM
- NSBundle
- maven 那点事儿(eclipse)
- 使用批处理命令bat文件快速一键启动mysql
- Yii 2.0加载自定义类或命名空间
- FZU 1402 猪的安家 (中国剩余定理)
- MLiA SVM心得
- CentOS下删除或重命名乱码文件
- 《Learning Javascript Design Patterns》小结
- 深入浅出的讲解傅里叶变换(真正的通俗易懂)
- tcp4http
- Moya源码解析
- SFINAE
- GIT 03 -git add 和git commit 的过程图解
- leetcode 371.Sum of Two Integers