adaBoost算法学习笔记
来源:互联网 发布:微机原理接口编程题 编辑:程序博客网 时间:2024/06/07 02:26
本文参考自《机器学习实战》
其中adaboostTrainDS()函数的返回值要修改为aggClassEst.T,不然ROC曲线会画不出来
#coding=utf-8from numpy import *'''单层决策树算法'''def loadSimpData(): dataMat=matrix([[1.0,2.1],[2.0,1.1],[1.3,1.0],[1.0,1.0],[2.0,1.0]]) classLabels=[1.0,1.0,-1.0,-1.0,1.0] return dataMat,classLabelsdef loadDataSet(filename): numFeat=len(open(filename).readline().split('\t')) dataMat=[];labelMat=[] fr=open(filename) for line in fr.readlines(): lineArr=[] currline=line.strip().split('\t') for i in range(numFeat-1): lineArr.append(float(currline[i])) dataMat.append(lineArr) labelMat.append(float(currline[-1])) return dataMat,labelMat'''dataMAtrix:数据集dimen:第几列threshVal:阈值threshIneq:lt或gt'''def stumpClassify(dataMAtrix,dimen,threshVal,threshIneq): retArray=ones((shape(dataMAtrix)[0],1)) if threshIneq=='lt': retArray[dataMAtrix[:,dimen] <= threshVal]=-1.0 else: retArray[dataMAtrix[:,dimen] > threshVal]=-1.0 return retArray'''dataArr:数据集classLabels:标签集D:初始权重'''def buildStump(dataArr,classLabels,D): dataMatrix=mat(dataArr)#矩阵化数据集 labelMat=mat(classLabels).T#矩阵化标签集然后转置 m,n=shape(dataMatrix)#或取数据集行数和列数 numSteps=10.0; bestStump={};#最佳决策树 bestClassEnt=mat(zeros((m,1))) minError=inf#初始化为无穷大 for i in range(n):#遍历数据集的所有特征 rangeMin=dataMatrix[:,i].min()#第i列的最小值 rangeMax=dataMatrix[:,i].max()#第i列的最大值 stepSize=(rangeMax-rangeMin)/numSteps#计算步长 for j in range(-1,int(numSteps)+1): for inequal in ['lt','gt']: threshVal=(rangeMin+float(j)*stepSize)#计算阈值 predictedVals=stumpClassify(dataMatrix,i,threshVal,inequal)#预测结果 errArr=mat(ones((m,1)))#初始化错误集 errArr[predictedVals == labelMat]=0#预测准确则变为0 weightedError=D.T*errArr#计算加权错误率 # print "split: dim %d,thresh %.2f,thresh inequal: %s,the weighted error is %.3f "%(i,threshVal,inequal,weightedError) if weightedError<minError:#将当前错误率与已有错误率比较 minError=weightedError bestClassEnt=predictedVals.copy() bestStump['dim']=i bestStump['thresh']=threshVal bestStump['ineq']=inequal return bestStump,minError,bestClassEnt'''dataArr:数据集classLabels:标签集numIt:迭代次数'''#def adaBoostTrainDS(dataArr,classLabels,numIt=40): weakClassArr=[] m=shape(dataArr)[0] D=mat(ones((m,1))/m) aggClassEst=mat(zeros((m,1))) for i in range(numIt): bestStump,error,classEst=buildStump(dataArr,classLabels,D) #print "D:",D.T alpha=float(0.5*log((1.0-error)/max(error,1e-16))) bestStump['alpha']=alpha weakClassArr.append(bestStump) #print "classEst:",classEst.T expon=multiply(-1*alpha*mat(classLabels).T,classEst) D=multiply(D,exp(expon)) D=D/D.sum() aggClassEst+=alpha*classEst #print "aggClassEst:",aggClassEst.T aggErrors=multiply(sign(aggClassEst)!=mat(classLabels).T,ones((m,1))) errorRate=aggErrors.sum()/m print "total error:",errorRate,"\n" if errorRate==0.0:break return weakClassArr,aggClassEst.T# dataMat,classLabels=loadSimpData()# D=mat(ones((5,1))/5)# bestStump,minError,bestClassEnt=buildStump(dataMat,classLabels,D)# print bestStump# classifierArray=adaBoostTrainDS(dataMat,classLabels,9)# print classifierArraydef adaClassify(datToClass,classifierArr): dataMatrix=mat(datToClass) m=shape(dataMatrix)[0] aggClassEst=mat(zeros((m,1))) for i in range(len(classifierArr)): classEst=stumpClassify(dataMatrix,classifierArr[i]['dim'],classifierArr[i]['thresh'],classifierArr[i]['ineq']) aggClassEst+=classifierArr[i]['alpha']*classEst #print aggClassEst return sign(aggClassEst)def plotROC(predStrengths,classLabels): import matplotlib.pyplot as plt cur=(1.0,1.0) ySum=0.0 numPosClas=sum(array(classLabels)==1.0) yStep=1/float(numPosClas) xStep=1/float(len(classLabels)-numPosClas) sortedIndicies=predStrengths.argsort() fig=plt.figure() fig.clf() ax=plt.subplot(111) for index in sortedIndicies.tolist()[0]: if classLabels[index]==1.0: delX=0;delY=yStep else: delX=xStep;delY=0 ySum+=cur[1] ax.plot([cur[0],cur[0]-delX],[cur[1],cur[1]-delY],c='b') cur=(cur[0]-delX,cur[1]-delY) ax.plot([0,1],[0,1],'b--') plt.xlabel('False Postive Rate') plt.ylabel('True Positive Rate') plt.title('ROC curve for AdaBoost Horse Colic Detection System') ax.axis([0,1,0,1]) plt.show() print "the Area under the Curve is :",ySum*xStepdataArr,labelArr=loadDataSet('horseColicTraining2.txt')classifierArray,aggClassEst=adaBoostTrainDS(dataArr,labelArr,10)testArr,testLabelArr=loadDataSet('horseColicTest2.txt')prediction10=adaClassify(testArr,classifierArray)errArr=mat(ones((67,1)))print errArr[prediction10!=mat(testLabelArr).T].sum()plotROC(aggClassEst,labelArr)
阅读全文
0 0
- Adaboost算法学习笔记
- adaBoost算法学习笔记
- AdaBoost元算法学习笔记
- 机器学习笔记之AdaBoost算法
- 提升方法AdaBoost算法学习笔记
- 提升方法AdaBoost算法学习笔记
- 【机器学习笔记之四】Adaboost 算法
- Adaboost算法阅读笔记
- AdaBoost算法笔记
- AdaBoost算法笔记
- AdaBoost算法学习
- 机器学习算法-Adaboost
- Adaboost算法学习
- 机器学习-AdaBoost算法
- 机器学习--AdaBoost算法
- 集成学习---AdaBoost算法
- 机器学习算法-Adaboost
- AdaBoost 学习笔记
- ural 1880. Psych Up's Eigenvalues 贪心
- Socket PRGM: chat_p2p
- 错误LinearLayoutManager is already attached to a RecyclerView
- 在broadcastReceiver 中弹出对话框
- oracle10G/11G官方下载地址集合 直接迅雷下载
- adaBoost算法学习笔记
- 文章标题
- SpringMVC学习之非注解的处理器映射器和适配器
- 14. Longest Common Prefix
- 用实例代码理解一下c++11”定义析构函数阻止合成移动”
- 幻境.最后一天
- 1、全志A33烧录固件(TF卡刷机)
- 我为什么要写博客
- Canvas 与 SVG 的比较