决策树算法实例(基于ID3)

来源:互联网 发布:直播平台用户数据分析 编辑:程序博客网 时间:2024/06/06 02:07
基于ID3的决策树算法,文中使用的sklearn的库,使用graphviz可以将决策树转换为pdf查看。

案例中用到的模拟数据如下:

############################################################################

RIDageincomestudentcredit_ratingclass_buys_computer1youthhighnofairno2youthhighnoexcellentno3middle_agedhighnofairyes4seniormediumnofairyes5seniorlowyesfairyes6seniorlowyesexcellentno7middle_agedlowyesexcellentyes8youthmediumnofairno9youthlowyesfairyes10seniormediumyesfairyes11youthmediumyesexcellentyes12middle_agedmediumnoexcellentyes13middle_agedhighyesfairyes14seniormediumnoexcellentno

############################################################################

from sklearn.feature_extraction import DictVectorizerimport  csvfrom sklearn import  treefrom  sklearn import  preprocessingfrom sklearn.externals.six import StringIOallelectionicsData = open(r'E:\myAI\AllElectronics.csv','rb')reader = csv.reader(allelectionicsData)headers = reader.next()featureList = []lableList = []print(headers)for row in reader:    # print row    lableList.append(row[len(row)-1])    rowDict = {}    for i in range(1,len(row) - 1):        rowDict[headers[i]] = row[i]    featureList.append(rowDict)print featureListvec = DictVectorizer()dummyX = vec.fit_transform(featureList).toarray()print("dummyX: " + str(dummyX))print(vec.get_feature_names())print("Lablelist: "+str(lableList))lb = preprocessing.LabelBinarizer()dummyY = lb.fit_transform(lableList)print("dummyY: "+ str(dummyY))clf = tree.DecisionTreeClassifier(criterion='entropy')clf = clf.fit(dummyX,dummyY)print("clf: " + str(clf))with open("allelectionicsData.dot",'w')  as f:    f = tree.export_graphviz(clf,feature_names = vec.get_feature_names(),out_file=f)oneRowX = dummyX[0,:]print("oneRowX: " + str(oneRowX))newoneRow = oneRowXnewoneRow[0] = 1newoneRow[2] = 0print("newoneRow : "+ str(newoneRow))predictedY = clf.predict(newoneRow)

print("predictedY: " + str(predictedY))

0 0
原创粉丝点击