【数据挖掘 xgboost】特征的重要程度分析

来源:互联网 发布:防网络诈骗图片 编辑:程序博客网 时间:2024/06/05 08:17

代码片段

import pandas as pdimport xgboost as xgbimport operator# # 从sklearn.cross_validation里选择导入train_test_split用于数据分割。# from sklearn.model_selection import train_test_split# # 从使用train_test_split,利用随机种子random_state采样25%的数据作为测试集。# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=33)## # 从sklearn.preprocessing里选择导入数据标准化模块。# from sklearn.preprocessing import StandardScaler# # 对训练和测试的特征数据进行标准化。# ss = StandardScaler()# X_train = ss.fit_transform(X_train)# X_test = ss.transform(X_test)#这里的参数自己改xgb_params = {'booster':'gbtree','objective': 'binary:logistic', "eta": 0.01, "max_depth": 5,  "silent": 0,"colsample_bytree":0.7}num_rounds = 1000dtrain = xgb.DMatrix(X_train, label=y_train)gbdt = xgb.train(xgb_params, dtrain, num_rounds)importance = gbdt.get_fscore()importance = sorted(importance.items(), key=operator.itemgetter(1))print importance
原创粉丝点击