Xgboost的简单使用2

来源:互联网 发布:客管家软件好用吗 编辑:程序博客网 时间:2024/05/21 14:43

1、xgboost预测

#xgboost预测import xgboost as xgb# read in datadtrain = xgb.DMatrix('demo/data/agaricus.txt.train')dtest = xgb.DMatrix('demo/data/agaricus.txt.test')# specify parameters via mapparam = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }num_round = 2bst = xgb.train(param, dtrain, num_round)# make predictionpreds = bst.predict(dtest)

知识点:
1bst = xgb.train(param, dtrain, num_round)

2.

#!/usr/bin/pythonimport numpy as npimport scipy.sparseimport pickleimport xgboost as xgb### simple example# load file from text file, also binary buffer generated by xgboostdtrain = xgb.DMatrix('agaricus.txt.train')dtest = xgb.DMatrix('agaricus.txt.test')# specify parameters via map, definition are same as c++ versionparam = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}# specify validations set to watch performancewatchlist = [(dtest, 'eval'), (dtrain, 'train')]num_round = 2bst = xgb.train(param, dtrain, num_round, watchlist)# this is predictionpreds = bst.predict(dtest)labels = dtest.get_label()print('error=%f' % (sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds))))bst.save_model('0001.model')# dump modelbst.dump_model('dump.raw.txt')# dump model with feature mapbst.dump_model('dump.nice.txt', 'featmap.txt')# save dmatrix into binary bufferdtest.save_binary('dtest.buffer')# save modelbst.save_model('xgb.model')# load model and data inbst2 = xgb.Booster(model_file='xgb.model')dtest2 = xgb.DMatrix('dtest.buffer')preds2 = bst2.predict(dtest2)# assert they are the sameassert np.sum(np.abs(preds2 - preds)) == 0# alternatively, you can pickle the boosterpks = pickle.dumps(bst2)# load model and data inbst3 = pickle.loads(pks)preds3 = bst3.predict(dtest2)# assert they are the sameassert np.sum(np.abs(preds3 - preds)) == 0#### build dmatrix from scipy.sparseprint('start running example of build DMatrix from scipy.sparse CSR Matrix')labels = []row = []; col = []; dat = []i = 0for l in open('agaricus.txt.train'):    arr = l.split()    labels.append(int(arr[0]))    for it in arr[1:]:        k,v = it.split(':')        row.append(i); col.append(int(k)); dat.append(float(v))    i += 1csr = scipy.sparse.csr_matrix((dat, (row, col)))dtrain = xgb.DMatrix(csr, label=labels)watchlist = [(dtest, 'eval'), (dtrain, 'train')]bst = xgb.train(param, dtrain, num_round, watchlist)print('start running example of build DMatrix from scipy.sparse CSC Matrix')# we can also construct from csc matrixcsc = scipy.sparse.csc_matrix((dat, (row, col)))dtrain = xgb.DMatrix(csc, label=labels)watchlist = [(dtest, 'eval'), (dtrain, 'train')]bst = xgb.train(param, dtrain, num_round, watchlist)print('start running example of build DMatrix from numpy array')# NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation# then convert to DMatrixnpymat = csr.todense()dtrain = xgb.DMatrix(npymat, label=labels)watchlist = [(dtest, 'eval'), (dtrain, 'train')]bst = xgb.train(param, dtrain, num_round, watchlist)

参考:

  1. 官方demo