Baging、DT和RF
来源:互联网 发布:淘宝other是什么牌子 编辑:程序博客网 时间:2024/06/06 03:48
下面是我总结的Baging、DT和RF相关内容
决策树(decision tree)代码
# -*- coding: utf-8 -*-"""Created on Fri Jan 20 10:42:35 2017@author: Administrator"""def LoadData(): fid = open('./data/abalone/data/abalone.data') Mat = fid.readlines() fid.close() dataset = [[0]*10 for i in range(len(Mat))] for i in range(len(Mat)): line = Mat[i] p = line.index(',') if line[0:p] == 'F': dataset[i][0] = 1 else: if line[0:p] == 'M': dataset[i][1] = 1 lineAx = line[p+1:] p = lineAx.index(',') dataset[i][2] = float(lineAx[0:p]) lineAx = lineAx[p+1:] p = lineAx.index(',') dataset[i][3] = float(lineAx[0:p]) lineAx = lineAx[p+1:] p = lineAx.index(',') dataset[i][4] = float(lineAx[0:p]) lineAx = lineAx[p+1:] p = lineAx.index(',') dataset[i][5] = float(lineAx[0:p]) lineAx = lineAx[p+1:] p = lineAx.index(',') dataset[i][6] = float(lineAx[0:p]) lineAx = lineAx[p+1:] p = lineAx.index(',') dataset[i][7] = float(lineAx[0:p]) lineAx = lineAx[p+1:] p = lineAx.index(',') dataset[i][8] = int(lineAx[p+1:len(lineAx)-1]) dataset[i][9] = float(lineAx[0:p]) return datasetdef chooseBestFeatureToSplit(dataset): impure0 = float('Inf') bestFeatFeature = 0 bestFeatVal = 0 for i in range(len(dataset[0]) - 1): FeatureValList = list(set([data[i] for data in dataset])) if len(FeatureValList) == 1: continue else: for Val in list(set(FeatureValList)): if Val == min(list(set(FeatureValList))): continue N1 = 0 N2 = 0 Mean1 = 0 Mean2 = 0 CenterMoment1 = 0 CenterMoment2 = 0 dataset1 = [] dataset2 = [] for data in dataset: if data[i] < Val: N1 += 1 Mean1 += data[-1] CenterMoment1 += data[-1]**2 dataset1 += [data] else: N2 += 1 Mean2 += data[-1] CenterMoment2 += data[-1]**2 dataset2 += [data] impure1 = CenterMoment1 - Mean1**2*N1 + CenterMoment2 - Mean2**2*N2 if impure1 < impure0: impure0 = impure1 bestFeatFeature = i bestFeatVal = Val dataset10 = dataset1 dataset20 = dataset2 return bestFeatFeature,bestFeatVal,dataset10,dataset20def Evaluation(classList): return sum(classList)/len(classList)def classify(inputTree,data): #找到分叉的特征 JudgmentFeature = list(inputTree.keys())[0] #分类的判别值 JudgmentValue = list(inputTree[JudgmentFeature].keys())[1] if data[JudgmentFeature] < JudgmentValue: if type(inputTree[JudgmentFeature][0]) == dict: return classify(inputTree[JudgmentFeature][0],data) else: return inputTree[JudgmentFeature][0] else: if type(inputTree[JudgmentFeature][JudgmentValue]) == dict: return classify(inputTree[JudgmentFeature][JudgmentValue],data) else: return inputTree[JudgmentFeature][JudgmentValue]'''#前剪纸def ForwardTest(inputTree,dataset): error = 0.0 for i in range(len(dataset)): error += (classify(inputTree,dataset[i]) - dataset[i][-1])**2 return errordef ForwardTestEvaluation(Evaluation,dataset): error = 0.0 for i in range(len(dataset)): error += (Evaluation - dataset[i][-1])**2 return error''' def MyCreateTree(dataset,depth): #取类别 classList = [data[-1] for data in dataset] #如果classList中索引为0的类别数量和classList元素数量相等 #即分支下都属同一类,停止递归 #print(dataset) if classList.count(classList[0]) == len(classList): return classList[0] #即分支下特征都相同或者深度为0,停止递归 flag = dataset[0][:-1] == dataset[1][:-1] for i in range(2,len(dataset)): if flag == 0: break else: flag = dataset[0][:-1] == dataset[i][:-1] if flag == 1 or depth == 0: return Evaluation(classList) #选择最具区分度特征 bestFeatFeature,bestFeatVal,dataset1,dataset2 = chooseBestFeatureToSplit(dataset) myTree = {bestFeatFeature:{}} for i in [0,bestFeatVal]: if i == 0: myTree[bestFeatFeature][0] = MyCreateTree(dataset1,depth - 1) else: myTree[bestFeatFeature][bestFeatVal] = MyCreateTree(dataset2,depth - 1) ''' Eval = Evaluation(classList) if ForwardTest(myTree,dataset) > ForwardTestEvaluation(Eval,dataset): return Eval print('cut') ''' return myTree
随机森林(Random Forest)代码
# -*- coding: utf-8 -*-"""Created on Fri Jan 20 16:13:14 2017@author: Administrator"""import random as rdimport timeimport DTdef Bootstrap(dataset,N,SeletedFeatures): rd.seed(time.time()) datasetNew = [] for i in range(N): datan = rd.randint(0, len(dataset) - 1) dataNew = [] for ii in range(len(SeletedFeatures)): dataNew += [dataset[datan][SeletedFeatures[ii]]] datasetNew += [dataNew + [dataset[datan][-1]]] return datasetNewdef myRF(FeatureN,TreeN,dataset,MaxDepth): SeletedFeaturesSet = [] TreesSet = [] for i in range(TreeN): SeletedFeatures = rd.sample(range(10),FeatureN) SeletedFeaturesSet += [SeletedFeatures] TreesSet += [DT.MyCreateTree(Bootstrap(dataset,len(dataset),SeletedFeatures),MaxDepth)] return SeletedFeaturesSet,TreesSetdef RFClassify(SeletedFeaturesSet,TreesSet,data): RFResult = 0 for i in range(len(SeletedFeaturesSet)): dataNew = [] for ii in range(len(SeletedFeaturesSet[i])): dataNew += [data[SeletedFeaturesSet[i][ii]]] RFResult += DT.classify(TreesSet[i],dataNew) return RFResult/len(SeletedFeaturesSet)
1 0
- Baging、DT和RF
- 决策树和基于决策树的集成方法(DT,RF,GBDT,XGB)复习总结
- 决策树和基于决策树的集成方法(DT,RF,GBDT,XGB)复习总结
- 决策树和基于决策树的集成方法(DT,RF,GBDT,XGB)复习总结
- 决策树和基于决策树的集成方法(DT,RF,GBDT,XGB)复习总结
- 决策树和基于决策树的集成方法(DT,RF,GBDT,XGB)复习总结
- 决策树和基于决策树的集成方法(DT,RF,GBDT,XGB)复习总结
- GBDT和RF
- dl dt 和 dd
- rf
- RF
- RF
- DT
- Rf GCF和一致性测试
- rf characters 速率和带宽
- RF/IF 和 ZigBee®解决方案
- Analog和RF领域经典书籍
- Centos7+RF 配置和使用
- .net MVC中Jquery实现AJAX详解
- Linux下图形函数库2---curses.h
- Android百度地图开发(二)-定位
- recordset对象类型不匹配题
- Linux学习笔记(2)
- Baging、DT和RF
- Spring的AOP使用实例
- CentOS6.5手动升级gcc4.8.2
- 查找OGG trail file中是否存在相关记录的命令
- EJB到底是什么,真的那么神秘吗??
- LQ BASIC-10 十进制转十六进制
- PyTorch开源
- JS学习笔记1——不要使用JavaScript内置的parseInt()和Number()函数,利用map和reduce操作实现一个string2int()函数
- Tomcat的默认访问路径