sklearn学习代码

来源:互联网 发布:新东方网络学校 编辑:程序博客网 时间:2024/05/18 03:08
from sklearn.ensemble import RandomForestClassifierfrom sklearn import svmfrom sklearn.linear_model import LogisticRegressionfrom sklearn.ensemble import AdaBoostClassifierfrom sklearn.ensemble import GradientBoostingClassifierimport pandas as pdfrom numpy import *import typestrain = pd.read_csv("data/train.csv")test = pd.read_csv("data/test.csv")#train["T2_V12"],_ = pd.factorize(train["T2_V12"])for i in range(2,len(train.columns),1):    if type(train.iloc[0][i]) is types.StringType:        train.iloc[:,i],_=pd.factorize(train.iloc[:,i])for i in range(1,len(test.columns),1):    if type(test.iloc[0][i]) is types.StringType:        test.iloc[:,i],_=pd.factorize(test.iloc[:,i])'''#RandomForestclf = RandomForestClassifier(n_jobs=2)features = train.columns[2:]clf.fit(train[features],train["Hazard"])result = clf.predict(test[features])''''''#SVMfeatures = train.columns[2:]clf =svm.SVC(kernel='linear').fit(train[features],train["Hazard"])result = clf.predict(test[features])''''''#LogisticRegressionfeatures = train.columns[2:]clf = LogisticRegression()clf.fit(train[features],train["Hazard"])result = clf.predict(test[features])''''''#RandomForestclf = AdaBoostClassifier(n_estimators=40,learning_rate=0.001)features = train.columns[2:]clf.fit(train[features],train["Hazard"])result = clf.predict(test[features])'''#RandomForestclf = GradientBoostingClassifier(n_estimators=100)features = train.columns[2:]clf.fit(train[features],train["Hazard"])result = clf.predict(test[features])test["Hazard"]=resultfinal = test.loc[:,["Id","Hazard"]]final.to_csv("data/out.csv",index=False)print(final)

0 0
原创粉丝点击