决策树(ID3)python
来源:互联网 发布:飞行器动力知乎 编辑:程序博客网 时间:2024/05/17 11:35
# coding=utf-8from numpy import *from math import logclass ID3(object): def createDataSet(self): dataSet = matrix([['s', 's', 'no', 0], ['s', 'l', 'yes', 1], ['l', 'm', 'yes', 1], ['m', 'm', 'yes', 1], ['l', 'm', 'yes', 1], ['m', 'l', 'no', 1], ['m', 's', 'no', 0], ['l', 'm', 'no', 1], ['m', 's', 'no', 1], ['s', 's', 'yes', 0]]) self.dataMat = dataSet[:, 0:-1] self.labelMat = dataSet[:, -1] self.keys = {} # 熵(entropy) def entropy(self, data): dataNum, featureNum = shape(data) labels = {} # create the dictionary for all of the data for i in arange(dataNum): if data[i, 0] not in labels.keys(): labels[data[i, 0]] = 1 else: labels[data[i, 0]] += 1 entropy = 0.0 for key in labels: prob = float(labels[key]) / float(dataNum) # get the log value entropy -= prob * math.log(prob, 2) return entropy # 信息增益 def gain(self, data, label): dataNum, featureNum = shape(data) labels = {} gain = self.entropy(label) # create the dictionary for all of the data for i in arange(dataNum): if data[i, 0] not in labels.keys(): labels[data[i, 0]] = [label[i, 0]] else: labels[data[i, 0]].append(label[i, 0]) for data in labels: gain_data = mat(labels[data]).T gain -= self.entropy(gain_data) * gain_data.shape[0] / dataNum return gain def split_DataSet(self, data, label, axis, value): dataset = hstack((data, label)) m, n = shape(dataset) split_data = mat(zeros((0, n - 1))) for feature in dataset: if feature[0, axis] == value: vec = hstack((feature[0, 0:axis], feature[0, axis + 1:])) split_data = vstack((split_data, vec)) return split_data[:, 0:-1], split_data[:, -1] def Best_Feature(self, data, label): gains = [] for i in arange(data.shape[1]): gains.append([self.gain(data[:, i], label), i]) gains.sort(reverse=True) axis = gains[0][1] values = set([i[0, 0] for i in data[:, axis]]) return axis, values def stop_Condtion(self, data, label): dataNum, featureNum = shape(data) label_classify = set([i[0, 0] for i in label]) more_label = self.more_label(label) if featureNum == 1: return True, more_label if len(label_classify) == 1: return True, more_label return False, more_label def more_label(self, labels): count = {} # create the dictionary for all of the data for label in labels: if label[0, 0] not in count.keys(): count[label[0, 0]] = 1 else: count[label[0, 0]] += 1 more_key = '' more_value = 0 for key, value in count.items(): if more_value < value: more_value = value more_key = key return more_key def build_Tree(self, data=None, label=None): if data is None: data = self.dataMat if label is None: label = self.labelMat stop, fina_label = self.stop_Condtion(data, label) if stop: return fina_label axis, values = self.Best_Feature(data, label) keys = {} for value in values: split_data, split_label = self.split_DataSet(data, label, axis, value) final_label = self.build_Tree(split_data, split_label) keys[value] = final_label return {axis: keys} def train(self): self.createDataSet() self.keys = self.build_Tree() def predict_One(self, data, keys=None): if keys is None: keys = self.keys if not isinstance(keys, dict): return keys for (key, value) in keys.items(): for (key1, value1) in value.items(): if data[0, key] == key1: return self.predict_One(data, value1) def predict(self, data): data_num = shape(data)[0] label = mat(zeros((data_num, 1))) for i in arange(data_num): label[i, 0] = self.predict_One(data[i, :]) return labelif __name__ == '__main__': tree = ID3() tree.train() dataSet = matrix([['s', 's', 'no', 0], ['s', 'l', 'yes', 1], ['l', 'm', 'yes', 1], ['m', 'm', 'yes', 1], ['l', 'm', 'yes', 1], ['m', 'l', 'no', 1], ['m', 's', 'no', 0], ['l', 'm', 'no', 1], ['m', 's', 'no', 1], ['s', 's', 'yes', 0]]) data = dataSet[:, 0:-1] label = dataSet[:, -1] target = tree.predict(data) print(target)
测试集:
[[‘0’ ‘1’ ‘1’ ‘1’ ‘1’ ‘1’ ‘0’ ‘1’ ‘1’ ‘0’]]
测试结果:
[[ 0. 1. 1. 1. 1. 1. 1. 1. 1. 0.]]
0 0
- 决策树(ID3)python
- 决策树ID3代码(Python)
- python实现决策树(ID3算法)
- 决策树(ID3,C4.5)Python实现
- Python 决策树算法(ID3 & C4.5)
- ID3决策树算法(python实现)
- 决策树(ID3算法)Python实现
- Python实现决策树(ID3、C4.5)
- id3决策树Python版
- Python-决策树ID3实践
- 决策树(一) ID3
- (决策树)ID3算法
- 决策树(1)ID3
- 决策树(一)--ID3
- python实现决策树ID3算法
- Python实现ID3算法决策树
- Python实现决策树算法ID3
- 决策树ID3的Python实现
- 微信小程序手机归属地查询功能实现
- 30分钟学会用scikit-learn的基本回归方法(线性、决策树、SVM、KNN)和集成方法(随机森林,Adaboost和GBRT)
- MIT6.824 分布式系统 lab1
- 快速排序算法
- JavaScript 学习笔记 1
- 决策树(ID3)python
- Leetcode 43. Multiply Strings
- Easy 88题 Merge Sorted Array
- 方程求值
- Leetcode 200. Number of Islands & 305. Number of Islands II
- ArrayList容器---去除重复元素
- 搞笑
- GCC 6.2, significantly improved performance!
- Easy 219题 Contains Duplicate II