实战xgboost与sklearn与pandas训练模型

来源：互联网发布：越狱后必装软件源编辑：程序博客网时间：2024/06/07 10:07
import cPickleimport xgboost as xgbimport numpy as npfrom sklearn.model_selection import KFold, train_test_split, GridSearchCVfrom sklearn.metrics import confusion_matrix, mean_squared_errorfrom sklearn.datasets import load_iris, load_digits, load_boston#用Xgboost建模，用sklearn做评估#二分类问题，用混淆矩阵digits = load_digits()y = digits['target']X = digits['data']X.shape(1797, 64)y.shape#K折的切分器kf = KFold(n_splits=2, shuffle=True, random_state=1234)for train_index, test_index in kf.split(X):    xgboost_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])    #预测结果    pred = xgboost_model.predict(X[test_index])    #标准答案    ground_truth = y[test_index]    print(confusion_matrix(ground_truth, pred))[[78  0  0  0  0  0  0  0  1  0] [ 0 92  1  0  0  0  0  0  0  0] [ 0  2 82  0  0  0  2  0  0  0] [ 0  1  1 88  0  0  0  1  0  3] [ 2  0  0  0 99  0  2  3  1  0] [ 0  0  0  1  0 95  2  0  0  4] [ 0  2  0  0  0  0 84  0  2  0] [ 0  0  0  0  0  0  0 86  0  2] [ 0  6  0  2  0  0  0  0 73  1] [ 0  1  0  0  1  0  0  5  2 71]][[98  0  0  0  0  0  0  1  0  0] [ 0 84  2  1  0  0  1  0  0  1] [ 1  0 88  0  0  0  0  1  1  0] [ 0  0  1 86  0  1  0  0  0  1] [ 0  0  0  0 74  0  0  0  0  0] [ 1  0  0  0  1 73  0  0  1  4] [ 0  0  0  0  1  1 91  0  0  0] [ 0  0  0  1  0  0  0 89  1  0] [ 1  1  0  0  1  1  0  0 87  1] [ 0  2  0  1  0  1  0  0  2 94]]#多分类iris = load_iris()y_iris = iris['target']X_iris = iris['data']kf = KFold(n_splits=2, shuffle=True, random_state=1234)for train_index, test_index in kf.split(X_iris):    xgboost_model = xgb.XGBClassifier().fit(X_iris[train_index], y_iris[train_index])    #预测结果    pred = xgboost_model.predict(X_iris[test_index])    #标准答案    ground_truth = y_iris[test_index]    print(confusion_matrix(ground_truth, pred))#回归问题boston = load_boston()y_boston = boston['target']X_boston = boston['data']kf = KFold(n_splits=2, shuffle=True, random_state=1234)for train_index, test_index in kf.split(X_boston):    xgboost_model = xgb.XGBRegressor().fit(X_boston[train_index], y_boston[train_index])    #预测结果    pred = xgboost_model.predict(X_boston[test_index])    #标准答案    ground_truth = y_boston[test_index]    print(mean_squared_error(ground_truth, pred))优化超参数(参数选择)boston = load_boston()y_boston = boston['target']X_boston = boston['data']xgb_model = xgb.XGBRegressor()#参数字典param_dict = {'max_depth':[2,4,6], 'n_estimators':[50, 100, 200]}rgs = GridSearchCV(xgb_model, param_dict)rgs.fit(X_boston, y_boston)print(rgs.best_score_)print(rgs.best_params_)
阅读全文
0 0