决策树

来源:互联网 发布:经济学是什么 知乎 编辑:程序博客网 时间:2024/05/21 20:25
# -*- coding: utf-8 -*-"""Created on Sun Oct 22 22:01:49 2017@author: wilcohuang"""from sklearn.feature_extraction import DictVectorizerimport csvfrom sklearn import treefrom sklearn import preprocessingfrom sklearn.externals.six import StringIO#Read in the csv file and put feature into list of dict and list of class label#这里要注意Python读取windows文件的方式,可以用'D:/'或者r'D:/',如果路径有中文必须转化为gbk编码,因为windows使用文件都是gbk编码allElectronicsData = open(r'C:\Users\wilcohuang\Desktop\\机器学习笔记\AllElectronics.csv'.decode('utf8').encode('gbk'), 'rb')reader = csv.reader(allElectronicsData)headers = reader.next()print(headers)featureList = []labelList = []for row in reader:    labelList.append(row[len(row)-1])    rowDict = {}    for i in range(1, len(row)-1):        rowDict[headers[i]] = row[i]    featureList.append((rowDict))print(featureList)#Vectorize featuresvec = DictVectorizer()dummyX = vec.fit_transform(featureList).toarray()print("dummyX: " + str(dummyX))print(vec.get_feature_names())print("labelList: " + str(labelList))# vectorize class labelslb = preprocessing.LabelBinarizer()dummyY = lb.fit_transform(labelList)print("dummyY: " + str(dummyY))#Using decision tree for classification# clf = tree.DecisionTreeClassifier()clf = tree.DecisionTreeClassifier(criterion='entropy')clf = clf.fit(dummyX, dummyY)print("clf: " + str(clf))# Visualize modelwith open("allElectronicInformationGainOri.dot", 'w') as f:    f = tree.export_graphviz(clf, feature_names=vec.get_feature_names(), out_file=f)#cmd#dot -Tpdf allElectronicInformationGainOri.dot -o allElectronicInformationGainOri.pdf oneRowX = dummyX[0, :]print("oneRowX: " + str(oneRowX))newRowX = oneRowXnewRowX[0] = 1;newRowX[2] = 0;print("newRowX: " + str(newRowX))paramRow = []paramRow.append(newRowX)print("paramRow: " + str(paramRow))predictedY = clf.predict(paramRow)print("predictedY: " + str(predictedY))
原创粉丝点击