决策树空气分析

来源:互联网 发布:淘宝月销售额层级划分 编辑:程序博客网 时间:2024/04/25 13:26

数据处理

# -*- coding: utf-8 -*-"""Created on Mon Apr 24 16:29:07 2017@author: Administrator"""import pandas as pdinputfile = 'F:/data/chapter9/demo/data/consider.xls' #数据文件outputfile= 'F:/data/chapter9/demo/data/consider_o.xls'data=pd.read_excel(inputfile)'''for i in range(data['空气等级'].size):    if data['空气等级'][i]=='I':        data['空气等级'][i]=1    elif data['空气等级'][i]=='II':        data['空气等级'][i]=2    elif data['空气等级'][i]=='III':        data['空气等级'][i]=3    elif data['空气等级'][i]=='IV':        data['空气等级'][i]=4    elif data['空气等级'][i]=='V':        data['空气等级'][i]=5    elif data['空气等级'][i]=='VI':        data['空气等级'][i]=6    elif data['空气等级'][i]=='VII':        data['空气等级'][i]=7'''data['空气等级'][data['空气等级']=='I']=1data['空气等级'][data['空气等级']=='II']=2data['空气等级'][data['空气等级']=='III']=3data['空气等级'][data['空气等级']=='IV']=4data['空气等级'][data['空气等级']=='V']=5data['空气等级'][data['空气等级']=='VI']=6data['空气等级'][data['空气等级']=='VII']=7data.to_excel(outputfile)

模型训练

# -*- coding: utf-8 -*-"""Created on Mon Apr 24 16:58:23 2017@author: Administrator"""#-*- coding: utf-8 -*-import pandas as pdfrom sklearn.tree import DecisionTreeClassifier as DTCinputfile = 'F:/data/chapter9/demo/data/consider_o.xls' #数据文件outputfile1 = 'F:/data/chapter9/demo/tmp/train_consider.xls' #训练样本混淆矩阵保存路径outputfile2 = 'F:/data/chapter9/demo/tmp/test_consider.xls' #测试样本混淆矩阵保存路径data = pd.read_excel(inputfile) #读取数据,指定编码为gbkdata = data.as_matrix()from numpy.random import shuffle #引入随机函数shuffle(data) #随机打乱数据data_train = data[:int(0.8*len(data)), :] #选取前80%为训练数据data_test = data[int(0.8*len(data)):, :] #选取前20%为测试数据#构造特征和标签x_train = data_train[:, 0:5]*30y_train = data_train[:, 6].astype(int)x_test = data_test[:, 0:5]*30y_test = data_test[:, 6].astype(int)model=DTC(criterion='entropy')model.fit(x_train,y_train)print(model.score(x_test,y_test))#导入输出相关的库,生成混淆矩阵from sklearn import metricscm_train = metrics.confusion_matrix(y_train, model.predict(x_train)) #训练样本的混淆矩阵cm_test = metrics.confusion_matrix(y_test, model.predict(x_test)) #测试样本的混淆矩阵#保存结果pd.DataFrame(cm_train, index = range(1, 8), columns = range(1, 8)).to_excel(outputfile1)pd.DataFrame(cm_test, index = range(1, 8), columns = range(1, 8)).to_excel(outputfile2)print(model.score(x_train,y_train))
0 0