决策树实现
来源:互联网 发布:淘宝网如何收藏宝贝 编辑:程序博客网 时间:2024/06/06 12:42
from math import logimport operator# 计算数据集的熵def calsShannonEnt(dataSet): numEntries = len(dataSet) labelCounts = {} for featVec in dataSet: currentLabel = featVec[-1] if currentLabel not in labelCounts.keys(): labelCounts[currentLabel] = 0 labelCounts[currentLabel] += 1 shannonEnt = 0.0 for key in labelCounts: prob = float(labelCounts[key]) / numEntries shannonEnt -= prob * log(prob, 2) return shannonEnt#创建数据集def createDataSet(): dataSet = [[1, 1, 'yes'], [1, 1, 'yes'], [1, 0, 'no'], [0, 1, 'no'], [0, 1, 'no']] labels = ['no surfacing', 'flippers'] return dataSet, labels#按照给定特征划分数据集def splitDataSet(dataSet, axis, value): retDataSet = [] for featVec in dataSet: if featVec[axis] == value: reducedFeatVec = featVec[:axis] reducedFeatVec.extend(featVec[axis + 1:]) retDataSet.append(reducedFeatVec) return retDataSet#选择划分数据集的最好特征def chooseBestFeatureToSplit(dataSet): numFeatures = len(dataSet[0]) - 1 baseEntropy = calsShannonEnt(dataSet) bestInfoGain = 0.0; bestFeature = -1 for i in range(numFeatures): featList = [example[i] for example in dataSet] uniqueVals = set(featList) newEntropy = 0.0 for value in uniqueVals: subDataSet = splitDataSet(dataSet, i, value) prob = len(subDataSet) / float(len(dataSet)) newEntropy += prob * calsShannonEnt(subDataSet) infoGain = baseEntropy - newEntropy if (infoGain > bestInfoGain): bestInfoGain = infoGain bestFeature = i return bestFeature#多数表决决定叶子分类def majorityCnt(classList): classCount = {} for vote in classList: if vote not in classCount.keys(): classCount[vote] = 0 classCount[vote] += 1 sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True) return sortedClassCount[0][0]#创建树结构def createTree(dataSet, labels): classList = [example[-1] for example in dataSet] if classList.count(classList[0]) == len(classList): return classList[0] if len(dataSet[0]) == 1: return majorityCnt(classList) bestFeat = chooseBestFeatureToSplit(dataSet) bestFeatLabel = labels[bestFeat] myTree = {bestFeatLabel: {}} del (labels[bestFeat]) featValues = [example[bestFeat] for example in dataSet] uniqueVals = set(featValues) for value in uniqueVals: subLabels = labels[:] myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value), subLabels) return myTree#分类def classify(inputTree, featLabels, testVec): firstStr = list(inputTree.keys())[0] secondDict = inputTree[firstStr] featIndex = featLabels.index(firstStr) for key in secondDict.keys(): if testVec[featIndex] == key: if type(secondDict[key]).__name__ == 'dict': classLabel = classify(secondDict[key], featLabels, testVec) else: classLabel = secondDict[key] return classLabel#序列化树结构def storeTree(inputTree, filename): import pickle fw = open(filename, 'wb') pickle.dump(inputTree, fw) fw.close()#反序列化树结构def grabTree(filename): import pickle fr = open(filename, 'rb') return pickle.load(fr)
阅读全文
0 0
- 决策树实现
- 决策树实现
- 决策树 实现
- # 详解决策树、python实现决策树
- ID3 算法实现决策树
- 决策树 id3 C++实现
- 决策树算法的实现
- 决策树及实现
- 决策树的实现
- 决策树代码实现
- 决策树代码实现
- Python实现决策树算法
- 决策树--Python实现
- sklearn中决策树实现
- sklearn中决策树实现
- 决策树C++实现
- 决策树及其python实现
- 决策树原理-python实现
- 线程的控制
- 打造Android万能上拉下拉刷新框架--XRefreshView(三)
- Mybatis学习(4):Mybatis及PageHelper插件和easyUI实现分页
- Vim插件之vimwiki
- 人工智能实现简单的五子棋程序
- 决策树实现
- Reverse Singly LinkedList的方法探讨
- 在ssh中用struts2标签读取list数组集合
- C++之MYSQL的简介
- Angular2的一些快捷查看
- DeepCoder初步了解
- 使用 Matplotlib 绘图
- MFC webBrowser 禁止加载图片、背景声音、ActiveX 等
- 关系数据库完整性