randomforest&GradientBoosting

来源:互联网 发布:三国杀张鲁淘宝价格 编辑:程序博客网 时间:2024/06/05 08:42

!/usr/bin/env python3

-- coding: utf-8 --

“””
Created on Tue Mar 14 14:39:19 2017

@author: dreamer
“””
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

def plot_feature_importances_cancer(model):
n_features = cancer.data.shape[1]
plt.barh(range(n_features), model.feature_importances_, align=’center’)
plt.yticks(np.arange(n_features), cancer.feature_names)
plt.xlabel(“Feature importance”)
plt.ylabel(“Feature”)

cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(
cancer.data, cancer.target, stratify=cancer.target, random_state=0)

LogisticRegression

logreg = LogisticRegression().fit(X_train, y_train)
print(“Training set score: {:.3f}”.format(logreg.score(X_train, y_train)))
print(“Test set score: {:.3f}”.format(logreg.score(X_test, y_test)))
”’
train = []
test = []
for i in range(1,200):

forest = RandomForestClassifier(        n_estimators= i,random_state=0).fit(X_train,y_train)train.append(forest.score(X_train,y_train))test.append(forest.score(X_test,y_test))

plt.plot(train)
plt.plot(test)
”’

RandomForest

forest = RandomForestClassifier(
n_estimators= 100,random_state=0,n_jobs=-1,
max_features=6).fit(X_train,y_train)
print(“Training set score: {:.3f}”.format(forest.score(X_train, y_train)))
print(“Test set score: {:.3f}”.format(forest.score(X_test, y_test)))
”’feature=plot_feature_importances_cancer(forest)”’

”’
from sklearn.tree import export_graphviz
export_graphviz(tree, out_file=”tree.dot”, class_names=[“malignant”, “benign”],
feature_names=cancer.feature_names, impurity=False, filled=True)
”’

GradientBoostingClassifier

from sklearn.ensemble import GradientBoostingClassifier

gbrt = GradientBoostingClassifier(random_state=0,max_depth=3,learning_rate=0.02).fit(X_train, y_train)
print(“Accuracy on training set: {:.3f}”.format(gbrt.score(X_train, y_train)))
print(“Accuracy on test set: {:.3f}”.format(gbrt.score(X_test, y_test)))

0 0