kaggle 里的ensemble函数,抄一个

来源:互联网 发布:ueditor java图片上传 编辑:程序博客网 时间:2024/05/22 06:14

来源> https://www.kaggle.com/eikedehling/stack-of-svm-elasticnet-xgboost-rf-0-55/code

class Ensemble(object):    def __init__(self, n_splits, stacker, base_models):        self.n_splits = n_splits        self.stacker = stacker        self.base_models = base_models    def fit_predict(self, X, y, T):        X = np.array(X)        y = np.array(y)        T = np.array(T)        folds = list(KFold(n_splits=self.n_splits, shuffle=True, random_state=2016).split(X, y))        S_train = np.zeros((X.shape[0], len(self.base_models)))        S_test = np.zeros((T.shape[0], len(self.base_models)))        for i, clf in enumerate(self.base_models):            S_test_i = np.zeros((T.shape[0], self.n_splits))            for j, (train_idx, test_idx) in enumerate(folds):                X_train = X[train_idx]                y_train = y[train_idx]                X_holdout = X[test_idx]                y_holdout = y[test_idx]                clf.fit(X_train, y_train)                y_pred = clf.predict(X_holdout)[:]                print ("Model %d fold %d score %f" % (i, j, r2_score(y_holdout, y_pred)))                S_train[test_idx, i] = y_pred                S_test_i[:, j] = clf.predict(T)[:]            S_test[:, i] = S_test_i.mean(axis=1)        # results = cross_val_score(self.stacker, S_train, y, cv=5, scoring='r2')        # print("Stacker score: %.4f (%.4f)" % (results.mean(), results.std()))        # exit()        self.stacker.fit(S_train, y)        res = self.stacker.predict(S_test)[:]        return resstack = Ensemble(n_splits=5,                 #stacker=ElasticNetCV(l1_ratio=[x/10.0 for x in range(1,10)]),                 stacker=ElasticNet(l1_ratio=0.1, alpha=1.4),                 base_models=(svm_pipe, en_pipe, xgb_pipe, rf_model))y_test = stack.fit_predict(train, y_train, test)