Kaggle的Digits Recognizer题目实现

来源:互联网 发布:天刀捏脸数据女 萌妹子 编辑:程序博客网 时间:2024/06/05 20:44

        机器学习看了有一阵子了,一些常用的算法已经有些了解。应该拿个项目/比赛练习一下,看看机器学习到底是如何应用的。Kaggle是个非常不错的机器学习和数据挖掘的比赛网站,网站提供数据,可以拿来练习算法。下面是101里面的第一道题,Digit Recognizer。用Python实现的。应用了KNN,SVM和RF算法。今天先贴上代码,过几天把相应的算法原理也写上,方便深入理解。这段代码是参照别的大神写的代码,进行了一定的修改和优化。数据源可以从如下网址获取:https://www.kaggle.com/c/digit-recognizer/data

import numpy as np import operatorimport csvimport scipyfrom sklearn.neighbors import KNeighborsClassifierfrom sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifierdef read_data(file, header = True, test = False, rows = 0):csv_reader = csv.reader(open(file, 'r'), delimiter = ',')data = []labels = []index = 0for line in csv_reader:index = index + 1if rows > 0 & index > rows:breakif header & index == 1:continueif not test:labels.append(int(line[0]))line = line[1:]data.append(np.array(np.int64(line)))return data, labelsdef predictKNN(train, labels, test):print 'KNN starts...'KNNobj = KNeighborsClassifier()KNNobj.fit(train, labels)predict = KNNobj.predict(test)pre_pro = KNNobj.predict_proba(test)max_pre_pro = pre_pro.max(axis = 1)print 'KNN ends...'return predict, max_pre_prodef predicSVC(train, labels, test):print 'SVC starts...'SVCobj = SVC(probability=True)SVCobj.fit(train, labels)predict = SVCobj.predict(test)pre_pro = SVCobj.predict_proba(test)max_pre_pro = pre_pro.max(axis = 1)print 'SVC ends...'return predict, max_pre_prodef predicRF(train, labels, test, label):print 'RF starts...'RFobj = RandomForestClassifier(n_estimators=200, n_jobs=2)RFobj.fit(train, labels)predict = RFobj.predict(test)pre_pro = RFobj.predict_proba(test)max_pre_pro = pre_pro.max(axis = 1)print 'RF ends...'return predict, max_pre_proclass PredicScore:def __init__(self, predict, score):self.predict = predictself.score = scorepredict = -1score = 0if __name__ = '__main__':print 'test begins...'train, labels = read_data('train.csv', rows = 100)test, label = read_data('test.csv', test = True, rows = 100)predict_RF, max_pre_pro_RF = predicRF(train, labels, test, label)predict_KNN, max_pre_pro_KNN = predicKNN(train, labels, test)predict_SVC, max_pre_pro_SVC = predicSVC(train, labels, test)index = 0result = []for eachscore in max_pre_pro_RF:KNNclassobj = PredicScore(predict_KNN[index], max_pre_pro_KNN[index])SVCclassobj = PredicScore(predict_SVC[index], max_pre_pro_SVC[index])RFclassobj = PredicScore(predict_RF[index], max_pre_pro_RF[index])scoreArray = []scoreArray.append(KNNclassobj)scoreArray.append(SVCclassobj)scoreArray.append(RFclassobj)max_score_obj = max(scoreArray, key=operator.attrgetter('score'))result.append(max_score_obj.predict)index = index + 1np.savetxt('submission.csv', result, fmt = '%i', delimiter = ',')print 'done'



0 0