sklearn pipeline简介

1. 直接调用fit和predict方法来对pipeline中的所有算法模型进行训练和预测。
2. 可以结合grid search对参数进行选择


>>> from sklearn.pipeline import Pipeline>>> from sklearn.svm import SVC>>> from sklearn.decomposition import PCA>>> estimators = [('reduce_dim', PCA()), ('svm', SVC())]>>> clf = Pipeline(estimators)>>> clfPipeline(steps=[('reduce_dim', PCA(copy=True, n_components=None,    whiten=False)), ('svm', SVC(C=1.0, cache_size=200, class_weight=None,    coef0=0.0, decision_function_shape=None, degree=3, gamma='auto',    kernel='rbf', max_iter=-1, probability=False, random_state=None,    shrinking=True, tol=0.001, verbose=False))])


>>> clf.set_params(svm__C=10)



>>> from sklearn import svm>>> from sklearn.datasets import samples_generator>>> from sklearn.feature_selection import SelectKBest>>> from sklearn.feature_selection import f_regression>>> from sklearn.pipeline import Pipeline>>> # generate some data to play with>>> X, y = samples_generator.make_classification(...     n_informative=5, n_redundant=0, random_state=42)>>> # ANOVA SVM-C>>> anova_filter = SelectKBest(f_regression, k=5)>>> clf = svm.SVC(kernel='linear')>>> anova_svm = Pipeline([('anova', anova_filter), ('svc', clf)])>>> # You can set the parameters using the names issued>>> # For instance, fit using a k of 10 in the SelectKBest>>> # and a parameter 'C' of the svm>>> anova_svm.set_params(anova__k=10, svc__C=.1).fit(X, y)...                                              Pipeline(steps=[...])>>> prediction = anova_svm.predict(X)>>> anova_svm.score(X, y)                        0.77...>>> # getting the selected features chosen by anova_filter>>> anova_svm.named_steps['anova'].get_support()...array([ True,  True,  True, False, False,  True, False,  True,  True, True,       False, False,  True, False,  True, False, False, False, False,       True], dtype=bool)
