xgboost--API

来源:互联网 发布:易学入门知乎 编辑:程序博客网 时间:2024/05/24 04:19
xgboost.sklearn.XGBClassifier = class XGBClassifier(XGBModel, sklearn.base.ClassifierMixin)
 |  Implementation of the scikit-learn API for XGBoost classification.
 |  
 |      Parameters
 |  ----------
 |  max_depth : int
 |      Maximum tree depth for base learners.
 |  learning_rate : float
 |      Boosting learning rate (xgb's "eta")
 |  n_estimators : int
 |      Number of boosted trees to fit.
 |  silent : boolean
 |      Whether to print messages while running boosting.
 |  objective : string or callable
 |      Specify the learning task and the corresponding learning objective or
 |      a custom objective function to be used (see note below).
 |  nthread : int
 |      Number of parallel threads used to run xgboost.
 |  gamma : float
 |      Minimum loss reduction required to make a further partition on a leaf node of the tree.
 |  min_child_weight : int
 |      Minimum sum of instance weight(hessian) needed in a child.
 |  max_delta_step : int
 |      Maximum delta step we allow each tree's weight estimation to be.
 |  subsample : float
 |      Subsample ratio of the training instance.
 |  colsample_bytree : float
 |      Subsample ratio of columns when constructing each tree.
 |  colsample_bylevel : float
 |      Subsample ratio of columns for each split, in each level.
 |  reg_alpha : float (xgb's alpha)
 |      L1 regularization term on weights
 |  reg_lambda : float (xgb's lambda)
 |      L2 regularization term on weights
 |  scale_pos_weight : float
 |      Balancing of positive and negative weights.
 |  
 |  base_score:
 |      The initial prediction score of all instances, global bias.
 |  seed : int
 |      Random number seed.
 |  missing : float, optional
 |      Value in the data which needs to be present as a missing value. If
 |      None, defaults to np.nan.
 |  
 |  Note
 |  ----
 |  A custom objective function can be provided for the ``objective``
 |  parameter. In this case, it should have the signature
 |  ``objective(y_true, y_pred) -> grad, hess``:
 |  
 |  y_true: array_like of shape [n_samples]
 |      The target values
 |  y_pred: array_like of shape [n_samples]
 |      The predicted values
 |  
 |  grad: array_like of shape [n_samples]
 |      The value of the gradient for each sample point.
 |  hess: array_like of shape [n_samples]
 |      The value of the second derivative for each sample point
 |  
 |  Method resolution order:
 |      XGBClassifier
 |      XGBModel
 |      sklearn.base.BaseEstimator
 |      sklearn.base.ClassifierMixin
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, max_depth=3, learning_rate=0.1, n_estimators=100, silent=True, objective='binary:logistic', nthread=-1, gamma=0, min_child_weight=1, max_delta_step=0, subsample=1, colsample_bytree=1, colsample_bylevel=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, base_score=0.5, seed=0, missing=None)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  evals_result(self)
 |      Return the evaluation results.
 |      
 |      If eval_set is passed to the `fit` function, you can call evals_result() to
 |      get evaluation results for all passed eval_sets. When eval_metric is also
 |      passed to the `fit` function, the evals_result will contain the eval_metrics
 |      passed to the `fit` function
 |      
 |      Returns
 |      -------
 |      evals_result : dictionary
 |      
 |      Example
 |      -------
 |      param_dist = {'objective':'binary:logistic', 'n_estimators':2}
 |      
 |      clf = xgb.XGBClassifier(**param_dist)
 |      
 |      clf.fit(X_train, y_train,
 |              eval_set=[(X_train, y_train), (X_test, y_test)],
 |              eval_metric='logloss',
 |              verbose=True)
 |      
 |      evals_result = clf.evals_result()
 |      
 |      The variable evals_result will contain:
 |      {'validation_0': {'logloss': ['0.604835', '0.531479']},
 |       'validation_1': {'logloss': ['0.41965', '0.17686']}}
 |  
 |  fit(self, X, y, sample_weight=None, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True)
 |      Fit gradient boosting classifier
 |      
 |      Parameters
 |      ----------
 |      X : array_like
 |          Feature matrix
 |      y : array_like
 |          Labels
 |      sample_weight : array_like
 |          Weight for each instance
 |      eval_set : list, optional
 |          A list of (X, y) pairs to use as a validation set for
 |          early-stopping
 |      eval_metric : str, callable, optional
 |          If a str, should be a built-in evaluation metric to use. See
 |          doc/parameter.md. If callable, a custom evaluation metric. The call
 |          signature is func(y_predicted, y_true) where y_true will be a
 |          DMatrix object such that you may need to call the get_label
 |          method. It must return a str, value pair where the str is a name
 |          for the evaluation and value is the value of the evaluation
 |          function. This objective is always minimized.
 |      early_stopping_rounds : int, optional
 |          Activates early stopping. Validation error needs to decrease at
 |          least every <early_stopping_rounds> round(s) to continue training.
 |          Requires at least one item in evals.  If there's more than one,
 |          will use the last. Returns the model from the last iteration
 |          (not the best one). If early stopping occurs, the model will
 |          have three additional fields: bst.best_score, bst.best_iteration
 |          and bst.best_ntree_limit.
 |          (Use bst.best_ntree_limit to get the correct value if num_parallel_tree
 |          and/or num_class appears in the parameters)
 |      verbose : bool
 |          If `verbose` and an evaluation set is used, writes the evaluation
 |          metric measured on the validation set to stderr.
 |  
 |  predict(self, data, output_margin=False, ntree_limit=0)
 |  
 |  predict_proba(self, data, output_margin=False, ntree_limit=0)
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  feature_importances_
 |      Returns
 |      -------
 |      feature_importances_ : array of shape = [n_features]
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from XGBModel:
 |  
 |  __setstate__(self, state)
 |  
 |  apply(self, X, ntree_limit=0)
 |      Return the predicted leaf every tree for each sample.
 |      
 |      Parameters
 |      ----------
 |      X : array_like, shape=[n_samples, n_features]
 |          Input features matrix.
 |      
 |      ntree_limit : int
 |          Limit number of trees in the prediction; defaults to 0 (use all trees).
 |      
 |      Returns
 |      -------
 |      X_leaves : array_like, shape=[n_samples, n_trees]
 |          For each datapoint x in X and for each tree, return the index of the
 |          leaf x ends up in. Leaves are numbered within
 |          ``[0; 2**(self.max_depth+1))``, possibly with gaps in the numbering.
 |  
 |  booster(self)
 |      Get the underlying xgboost Booster of this model.
 |      
 |      This will raise an exception when fit was not called
 |      
 |      Returns
 |      -------
 |      booster : a xgboost booster of underlying model
 |  
 |  get_params(self, deep=False)
 |      Get parameter.s
 |  
 |  get_xgb_params(self)
 |      Get xgboost type parameters.
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from sklearn.base.BaseEstimator:
 |  
 |  __getstate__(self)
 |  
 |  __repr__(self)
 |      Return repr(self).
 |  
 |  set_params(self, **params)
 |      Set the parameters of this estimator.
 |      
 |      The method works on simple estimators as well as on nested objects
 |      (such as pipelines). The latter have parameters of the form
 |      ``<component>__<parameter>`` so that it's possible to update each
 |      component of a nested object.
 |      
 |      Returns
 |      -------
 |      self
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors inherited from sklearn.base.BaseEstimator:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from sklearn.base.ClassifierMixin:
 |  
 |  score(self, X, y, sample_weight=None)
 |      Returns the mean accuracy on the given test data and labels.
 |      
 |      In multi-label classification, this is the subset accuracy
 |      which is a harsh metric since you require for each sample that
 |      each label set be correctly predicted.
 |      
 |      Parameters
 |      ----------
 |      X : array-like, shape = (n_samples, n_features)
 |          Test samples.
 |      
 |      y : array-like, shape = (n_samples) or (n_samples, n_outputs)
 |          True labels for X.
 |      
 |      sample_weight : array-like, shape = [n_samples], optional
 |          Sample weights.
 |      
 |      Returns
 |      -------
 |      score : float
 |          Mean accuracy of self.predict(X) wrt. y.
原创粉丝点击