kaggle系列(一、Titanic入门比赛)
来源:互联网 发布:c语言二分法求根精度 编辑:程序博客网 时间:2024/04/29 13:17
In [1]:
import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as sns%matplotlib inlineimport warningswarnings.filterwarnings('ignore')from collections import Counterfrom sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier,\ ExtraTreesClassifier,VotingClassifierfrom sklearn.linear_model import LogisticRegressionfrom sklearn.neighbors import KNeighborsClassifierfrom sklearn.naive_bayes import GaussianNB from sklearn.svm import SVCfrom sklearn.model_selection import GridSearchCV, cross_val_score, StratifiedKFold, learning_curve
In [2]:
train = pd.read_csv("C:/Code/Kaggle/Titanic/train.csv")test = pd.read_csv("C:/Code/Kaggle/Titanic/test.csv")IDtest = test["PassengerId"]
In [3]:
def detect_outliers(df,n,features): outlier_indices = [] for col in features: Q1 = np.percentile(df[col],25) Q3 = np.percentile(df[col],75) IQR = Q3 - Q1 outlier_step = 1.5 * IQR outlier_list_col = df[(df[col] < Q1 - outlier_step) | (df[col] > Q3 + outlier_step)].index outlier_indices.extend(outlier_list_col) outlier_indices = Counter(outlier_indices) multiple_outliers = list(k for k, v in outlier_indices.items() if v>n) return multiple_outliersOutliers_to_drop = detect_outliers(train,2,["Age","SibSp","Parch","Fare"])
In [4]:
train.loc[Outliers_to_drop]
Out[4]:
In [5]:
train = train.drop(Outliers_to_drop,axis=0).reset_index(drop=True)
In [6]:
train_len = len(train)dataset = pd.concat([train,test], axis=0).reset_index(drop=True)dataset.tail()
Out[6]:
In [7]:
#dataset = dataset.fillna(np.nan)dataset.isnull().sum()
Out[7]:
In [8]:
train.info()train.isnull().sum()
Out[8]:
In [9]:
train.describe()
Out[9]:
In [10]:
g = sns.heatmap(train[["Survived","SibSp","Parch","Age","Fare"]].corr(), annot=True, fmt=".2f", cmap = "coolwarm")
In [11]:
g = sns.factorplot(x="SibSp",y="Survived",data=train,kind="bar")g = g.set_ylabels("survival probability")
In [12]:
g = sns.factorplot(x="Parch",y="Survived",data=train,kind="bar")g = g.set_ylabels("survival probability")
In [13]:
g = sns.kdeplot(train["Age"][(train["Survived"] == 0) & (train["Age"].notnull())], color="Red", shade = True)g = sns.kdeplot(train["Age"][(train["Survived"] == 1) & (train["Age"].notnull())], ax =g, color="Blue", shade= True)g.set_xlabel("Age")g.set_ylabel("Frequency")g = g.legend(["Not Survived","Survived"])
In [14]:
# Explore Age vs Sex, Parch , Pclass and SibSPg = sns.factorplot(y="Age",x="Sex",data=dataset,kind="box")g = sns.factorplot(y="Age",x="Sex",hue="Pclass", data=dataset,kind="box")g = sns.factorplot(y="Age",x="Parch", data=dataset,kind="box")g = sns.factorplot(y="Age",x="SibSp", data=dataset,kind="box")
In [15]:
# convert Sex into categorical value 0 for male and 1 for femaledataset["Sex"] = dataset["Sex"].map({"male": 0, "female":1})g = sns.heatmap(dataset[["Age","Sex","SibSp","Parch","Pclass"]].corr(),cmap="coolwarm",annot=True)
In [16]:
# Filling missing value of Age ## Fill Age with the median age of similar rows according to Pclass, Parch and SibSp# Index of NaN age rowsindex_NaN_age = list(dataset["Age"][dataset["Age"].isnull()].index)for i in index_NaN_age : age_med = dataset["Age"].median() age_pred = dataset["Age"][((dataset['SibSp'] == dataset.iloc[i]["SibSp"]) & (dataset['Parch'] == dataset.iloc[i]["Parch"]) & (dataset['Pclass'] == dataset.iloc[i]["Pclass"]))].median() if not np.isnan(age_pred) : dataset['Age'].iloc[i] = age_pred else : dataset['Age'].iloc[i] = age_meddataset.tail()
Out[16]:
In [17]:
g = sns.factorplot(x="Survived", y = "Age",data = train, kind="box")g = sns.factorplot(x="Survived", y = "Age",data = train, kind="violin")
In [18]:
#Fill Fare missing values with the median valuedataset["Fare"] = dataset["Fare"].fillna(dataset["Fare"].median())g = sns.distplot(dataset["Fare"], color="m")g = g.legend(loc="best")
In [19]:
# Apply log to Fare to reduce skewness distributiondataset["Fare"] = dataset["Fare"].map(lambda i: np.log(i) if i > 0 else 0)g = sns.distplot(dataset["Fare"], color="b")g = g.legend(loc="best")
In [20]:
g = sns.factorplot(x="Sex",y="Survived",data=train,kind="bar")g = g.set_ylabels("Survival Probability")
In [21]:
train[["Sex","Survived"]].groupby('Sex').mean()
Out[21]:
In [22]:
g = sns.factorplot(x="Pclass",y="Survived",data=train,kind="bar", size = 6 , palette = "muted")g = g.set_ylabels("survival probability")
In [23]:
g = sns.factorplot(x="Pclass", y="Survived", hue="Sex", data=train, size=6, kind="bar", palette="muted")g = g.set_ylabels("survival probability")
In [24]:
#Fill Embarked nan values of dataset set with 'S' most frequent valuedataset["Embarked"] = dataset["Embarked"].fillna("S")g = sns.factorplot(x="Embarked", y="Survived", data=train, size=6, kind="bar", palette="muted")g = g.set_ylabels("survival probability")
In [25]:
# Explore Pclass vs Embarked g = sns.factorplot("Pclass", col="Embarked", data=train, size=6, kind="count", palette="muted")g = g.set_ylabels("Count")
In [26]:
dataset["Name"].head()
Out[26]:
In [27]:
# Get Title from Namedataset_title = [i.split(",")[1].split(".")[0].strip() for i in dataset["Name"]]dataset["Title"] = pd.Series(dataset_title)dataset["Title"].head()
Out[27]:
In [28]:
g = sns.countplot(x="Title",data=dataset)g = plt.setp(g.get_xticklabels(), rotation=45)
In [29]:
# Convert to categorical values Title dataset["Title"] = dataset["Title"].replace(['Lady', 'the Countess','Countess','Capt', 'Col','Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')dataset["Title"] = dataset["Title"].map({"Master":0, "Miss":1, "Ms" : 1 , "Mme":1, "Mlle":1, "Mrs":1, "Mr":2, "Rare":3})dataset["Title"] = dataset["Title"].astype(int)
In [30]:
g = sns.countplot(dataset["Title"])g = g.set_xticklabels(["Master","Miss/Ms/Mme/Mlle/Mrs","Mr","Rare"])
In [31]:
g = sns.factorplot(x="Title",y="Survived",data=dataset[:train_len],kind="bar")g = g.set_xticklabels(["Master","Miss-Mrs","Mr","Rare"])g = g.set_ylabels("survival probability")
In [32]:
# Drop Name variabledataset.drop(labels = ["Name"], axis = 1, inplace = True) #inplace为True时返回None,为默认False时返回dataset
In [33]:
# convert to indicator values Title and Embarked Title_dummies = pd.get_dummies(dataset['Title'],prefix='Title')dataset = dataset.join(Title_dummies).drop(['Title'],axis=1)#dataset = pd.get_dummies(dataset, columns = ["Title"])dataset.drop(['Title_3'],axis=1,inplace=True) #这里去掉存活率最低的一列(冗余特征)
In [34]:
# Create a family size descriptor from SibSp and Parchdataset["Fsize"] = dataset["SibSp"] + dataset["Parch"] + 1g = sns.factorplot(x="Fsize",y="Survived",data = dataset)g = g.set_ylabels("Survival Probability")
In [35]:
# Create new feature of family sizedataset['Single'] = dataset['Fsize'].map(lambda s: 1 if s == 1 else 0)dataset['SmallF'] = dataset['Fsize'].map(lambda s: 1 if s == 2 else 0)dataset['MedF'] = dataset['Fsize'].map(lambda s: 1 if 3 <= s <= 4 else 0)dataset['LargeF'] = dataset['Fsize'].map(lambda s: 1 if s >= 5 else 0)
In [36]:
dataset.drop(['Fsize','SibSp','Parch'],axis=1,inplace=True)
In [37]:
dataset.columns
Out[37]:
In [38]:
dataset[:train_len][['Embarked', 'Survived']].groupby(['Embarked'], as_index=False).mean().sort_values(by='Embarked', ascending=True)
Out[38]:
In [39]:
#dataset = pd.get_dummies(dataset, columns = ["Embarked"], prefix="Em")Embarked_dummies = pd.get_dummies(dataset['Embarked'], prefix='Em')dataset = dataset.join(Embarked_dummies).drop(['Embarked'],axis=1)dataset.drop(['Em_S'],axis=1,inplace=True)
In [40]:
dataset.columns
Out[40]:
In [41]:
dataset["Cabin"].head()
Out[41]:
In [42]:
dataset["Cabin"].describe()
Out[42]:
In [43]:
dataset["Cabin"].isnull().sum()
Out[43]:
In [44]:
dataset["Cabin"][dataset["Cabin"].notnull()].head()
Out[44]:
In [45]:
# Replace the Cabin number by the type of cabin 'X' if notdataset["Cabin"] = pd.Series([i[0] if not pd.isnull(i) else 'X' for i in dataset['Cabin'] ])
In [46]:
g = sns.countplot(dataset["Cabin"],order=['A','B','C','D','E','F','G','T','X'])
In [47]:
g = sns.factorplot(y="Survived",x="Cabin",data=dataset[:train_len],kind="bar",order=['A','B','C','D','E','F','G','T','X'])g = g.set_ylabels("Survival Probability")
In [48]:
dataset = pd.get_dummies(dataset, columns = ["Cabin"],prefix="Cabin")dataset.drop(['Cabin_T'],axis=1,inplace=True)
In [49]:
dataset["Ticket"].head()
Out[49]:
In [50]:
## Treat Ticket by extracting the ticket prefix. When there is no prefix it returns X. Ticket = []for i in list(dataset.Ticket): if not i.isdigit() : Ticket.append(i.replace(".","").replace("/","").strip().split(' ')[0]) #Take prefix else: Ticket.append("X") dataset["Ticket"] = Ticketdataset["Ticket"].head()
Out[50]:
In [51]:
dataset[:train_len][['Ticket', 'Survived']].groupby(['Ticket'], as_index=False).mean().sort_values(by='Ticket', ascending=True)
Out[51]:
In [52]:
dataset = pd.get_dummies(dataset, columns = ["Ticket"], prefix="T")dataset.drop(['T_A4'],axis=1,inplace=True)
In [53]:
dataset[:train_len][['Pclass', 'Survived']].groupby(['Pclass'], as_index=False).mean().sort_values(by='Pclass', ascending=True)
Out[53]:
In [54]:
# Create categorical values for Pclassdataset["Pclass"] = dataset["Pclass"].astype("category")dataset = pd.get_dummies(dataset, columns = ["Pclass"],prefix="Pc")dataset.drop(['Pc_3'],axis=1,inplace=True)
In [55]:
dataset['Age']=dataset['Age'].astype(int)dataset['AgeBand'] = pd.cut(dataset['Age'], 5)dataset[:train_len][['AgeBand', 'Survived']].groupby(['AgeBand'], as_index=False).mean().sort_values(by='AgeBand', ascending=True)
Out[55]:
In [56]:
dataset.tail()
Out[56]:
In [57]:
dataset.loc[ dataset['Age'] <= 16, 'Age'] = 0dataset.loc[(dataset['Age'] > 16) & (dataset['Age'] <= 32), 'Age'] = 1dataset.loc[(dataset['Age'] > 32) & (dataset['Age'] <= 48), 'Age'] = 2dataset.loc[(dataset['Age'] > 48) & (dataset['Age'] <= 64), 'Age'] = 3dataset.loc[ dataset['Age'] > 64, 'Age'] = 4 Age_dummies = pd.get_dummies(dataset['Age'], prefix='Age')dataset=dataset.join(Age_dummies).drop(['Age','AgeBand'],axis=1)dataset.drop(['Age_4'],axis=1,inplace=True)
In [58]:
dataset['FareBand'] = pd.cut(dataset['Fare'], 4)dataset[:train_len][['FareBand','Survived']].groupby(['FareBand'],as_index=False).mean().sort_values(by='FareBand', ascending=True)
Out[58]:
In [59]:
dataset.columns
Out[59]:
In [60]:
dataset.loc[ dataset['Fare'] <= 1.56, 'Fare'] = 0dataset.loc[(dataset['Fare'] > 1.56) & (dataset['Fare'] <= 3.119), 'Fare'] = 1dataset.loc[(dataset['Fare'] > 3.119) & (dataset['Fare'] <= 4.679), 'Fare'] = 2dataset.loc[ dataset['Fare'] > 4.679, 'Fare'] = 3Fare_dummies = pd.get_dummies(dataset['Fare'], prefix='Fare')dataset = dataset.join(Fare_dummies).drop(['Fare','FareBand'],axis=1)
In [61]:
dataset.columns
Out[61]:
In [62]:
dataset.drop(['Fare_0.0'],axis=1,inplace=True)
In [63]:
# Drop useless variables dataset.drop(labels = ["PassengerId"], axis = 1, inplace = True)
In [64]:
dataset.head()
Out[64]:
In [65]:
dataset.columns
Out[65]:
In [66]:
## Separate train dataset and test datasettrain = dataset[:train_len]test = dataset[train_len:]test.drop(labels=["Survived"],axis = 1,inplace=True)
In [67]:
## Separate train features and label train["Survived"] = train["Survived"].astype(int)Y_train = train["Survived"]X_train = train.drop(labels = ["Survived"],axis = 1)
In [68]:
# Cross validate model with Kfold stratified cross valkfold = StratifiedKFold(n_splits=10)
In [69]:
k_range=list([16,18])knn_param_grid={'n_neighbors' : k_range}gridKNN = GridSearchCV(KNeighborsClassifier(),param_grid = knn_param_grid, cv=kfold, scoring="accuracy", n_jobs= -1, verbose = 1)gridKNN.fit(X_train,Y_train)print(gridKNN.best_estimator_)print(gridKNN.best_score_)
In [70]:
LR_param_grid={'penalty' : ['l1', 'l2'], 'C' : [0.001,0.01,0.1,1,10,100]}gridLR = GridSearchCV(LogisticRegression(),param_grid = LR_param_grid, cv=kfold, scoring="accuracy", n_jobs= -1, verbose = 1)gridLR.fit(X_train,Y_train)print(gridLR.best_estimator_)print(gridLR.best_score_)
In [71]:
from sklearn.naive_bayes import GaussianNB GaussianNB=GaussianNB()GaussianNB.fit(X_train, Y_train)NB_score=cross_val_score(GaussianNB,X_train,Y_train, cv = kfold,scoring = "accuracy").mean()print(NB_score)#不知怎么回事,计算有问题
In [72]:
C=[0.05,0.1,0.2,0.3,0.25,0.4,0.5,0.6,0.7,0.8,0.9,1]gamma=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]kernel=['rbf','linear']SVC_param_grid={'kernel':kernel,'C':C,'gamma':gamma}gridSVC = GridSearchCV(SVC(),param_grid = SVC_param_grid, cv=kfold, scoring="accuracy", n_jobs= -1, verbose = 1)gridSVC.fit(X_train,Y_train)print(gridSVC.best_estimator_)print(gridSVC.best_score_)
In [73]:
test_Survived = pd.Series(gridSVC.best_estimator_.predict(test), name="Survived")results_SVC = pd.concat([IDtest,test_Survived],axis=1)results_SVC.to_csv("SVC_predict.csv",index=False)
In [74]:
# RFC Parameters tunning RFC = RandomForestClassifier()## Search grid for optimal parametersrf_param_grid = {"n_estimators" :[300, 500], "max_depth": [8, 15], "min_samples_split": [2, 5, 10], "min_samples_leaf": [1, 2, 5], "max_features": ['log2', 'sqrt']}gsRFC = GridSearchCV(RFC,param_grid = rf_param_grid, cv=5, scoring="accuracy", n_jobs= -1, verbose = 1)gsRFC.fit(X_train,Y_train)RFC_best = gsRFC.best_estimator_# Best scoregsRFC.best_score_ ,RFC_best
Out[74]:
In [93]:
test_Survived = pd.Series(RFC_best.predict(test), name="Survived")results_RFC_best = pd.concat([IDtest,test_Survived],axis=1)results_RFC_best.to_csv("RFC_best.csv",index=False)
In [75]:
#ExtraTrees ExtC = ExtraTreesClassifier()## Search grid for optimal parametersex_param_grid = {"max_depth": [8, 15], "max_features": ['log2', 'sqrt'], "min_samples_split": [2,5, 10], "min_samples_leaf": [1, 2, 5], "n_estimators" :[300, 500]}gsExtC = GridSearchCV(ExtC,param_grid = ex_param_grid, cv=5, scoring="accuracy", n_jobs= -1, verbose = 1)gsExtC.fit(X_train,Y_train)ExtC_best = gsExtC.best_estimator_# Best scoregsExtC.best_score_, ExtC_best
Out[75]:
In [76]:
# Gradient boosting tunningGBC = GradientBoostingClassifier()gb_param_grid = { 'learning_rate': [0.1, 0.05, 0.01], 'max_depth': [3, 5, 10], 'min_samples_leaf': [50,100,150], 'max_features' :['sqrt','log2'] }gsGBC = GridSearchCV(GBC,param_grid = gb_param_grid,cv=5, scoring="accuracy", n_jobs= 4, verbose = 1)gsGBC.fit(X_train,Y_train)GBC_best = gsGBC.best_estimator_# Best scoregsGBC.best_score_,GBC_best
Out[76]:
In [77]:
import xgboost as xgbfrom xgboost.sklearn import XGBClassifier## Search grid for optimal parametersxgb_param_grid = {"learning_rate": [0.01,0.5,1.0], "n_estimators" : [300,500], "gamma": [0.1, 0.5,1.0], "max_depth": [3, 5, 10], "min_child_weight": [1, 3], "subsample" : [0.8,1.0], "colsample_bytree" : [0.8,1.0]}gridxgb = GridSearchCV(XGBClassifier(),param_grid = xgb_param_grid, cv=5, scoring="accuracy", n_jobs= -1, verbose = 1)gridxgb.fit(X_train,Y_train)gridxgb_best = gridxgb.best_estimator_# Best scoregridxgb.best_score_
Out[77]:
In [78]:
print(gridxgb_best)
In [92]:
test_Survived = pd.Series(gridxgb_best.predict(test), name="Survived")results_gridxgb = pd.concat([IDtest,test_Survived],axis=1)results_gridxgb.to_csv("gridxgb.csv",index=False)
In [79]:
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, n_jobs=-1, train_sizes=np.linspace(.1, 1.0, 5)): """Generate a simple plot of the test and training learning curve""" plt.figure() plt.title(title) if ylim is not None: plt.ylim(*ylim) plt.xlabel("Training examples") plt.ylabel("Score") train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.grid() plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r") plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g") plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score") plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score") plt.legend(loc="best") return pltg = plot_learning_curve(gsRFC.best_estimator_,"RF mearning curves",X_train,Y_train,cv=kfold)g = plot_learning_curve(gsExtC.best_estimator_,"ExtraTrees learning curves",X_train,Y_train,cv=kfold)g = plot_learning_curve(gsGBC.best_estimator_,"GradientBoosting learning curves",X_train,Y_train,cv=kfold)g = plot_learning_curve(gridxgb.best_estimator_,"XGBoost learning curves",X_train,Y_train,cv=kfold)
In [80]:
nrows = ncols = 2fig, axes = plt.subplots(nrows = nrows, ncols = ncols, sharex="all", figsize=(15,15))names_classifiers = [("ExtraTrees",ExtC_best),("RandomForest",RFC_best),("GradientBoosting",GBC_best),("XGBoost", gridxgb_best)]nclassifier = 0for row in range(nrows): for col in range(ncols): name = names_classifiers[nclassifier][0] classifier = names_classifiers[nclassifier][1] indices = np.argsort(classifier.feature_importances_)[::-1][:40] g = sns.barplot(y=X_train.columns[indices][:40],x = classifier.feature_importances_[indices][:40] , orient='h',ax=axes[row][col]) g.set_xlabel("Relative importance",fontsize=12) g.set_ylabel("Features",fontsize=12) g.tick_params(labelsize=9) g.set_title(name + " feature importance") nclassifier += 1
In [81]:
test_Survived_RFC = pd.Series(RFC_best.predict(test), name="RFC")test_Survived_ExtC = pd.Series(ExtC_best.predict(test), name="ExtC")test_Survived_GBC = pd.Series(GBC_best.predict(test), name="GBC")test_Survived_xgb = pd.Series(gridxgb_best.predict(test), name="xgb")# Concatenate all classifier resultsensemble_results = pd.concat([test_Survived_RFC,test_Survived_ExtC,test_Survived_GBC, test_Survived_xgb],axis=1)g= sns.heatmap(ensemble_results.corr(),annot=True)
In [82]:
votingC = VotingClassifier(estimators=[('rfc', RFC_best), ('extc', ExtC_best), ('gbc',GBC_best), ('xgb', gridxgb_best)], voting='soft', n_jobs=-1)votingC = votingC.fit(X_train, Y_train)
In [83]:
test_Survived = pd.Series(votingC.predict(test), name="Survived")results_votingC = pd.concat([IDtest,test_Survived],axis=1)results_votingC.to_csv("ensemble_python_voting.csv",index=False)
In [90]:
#第一层class Ensemble_stacking1(object): def __init__(self, n_folds, base_models): self.n_folds = n_folds self.base_models = base_models def get_data_to2(self, X, y, T): X = np.array(X) y = np.array(y) T = np.array(T) folds = StratifiedKFold(n_splits=self.n_folds, shuffle=True, random_state=2016).split(X,y) S_train = np.zeros((X.shape[0], len(self.base_models))) S_test = np.zeros((T.shape[0], len(self.base_models))) for i, clf in enumerate(self.base_models): S_test_i = np.zeros((T.shape[0], self.n_folds)) for j, (train_idx, test_idx) in enumerate(folds): X_train = X[train_idx] y_train = y[train_idx] X_holdout = X[test_idx] # y_holdout = y[test_idx] clf.fit(X_train, y_train) y_pred = clf.predict(X_holdout)[:] S_train[test_idx, i] = y_pred S_test_i[:, j] = clf.predict(T)[:] S_test[:, i] = S_test_i.mean(1) return S_train, S_test#第二层xgb2_param_grid = {"learning_rate": [0.01,0.5], "n_estimators" : [300,500], "gamma": [0.1, 0.5,1.0], "max_depth": [3, 5, 10], "min_child_weight": [1, 3 , 5, 7], "subsample" : [0.8,1.0], "colsample_bytree" : [0.6,0.8]}gridxgb2 = GridSearchCV(XGBClassifier(),param_grid = xgb2_param_grid, cv=5, scoring="accuracy", n_jobs= -1, verbose = 1)S_train, S_test = Ensemble_stacking1(5, [RFC_best, ExtC_best, GBC_best, gridxgb]).get_data_to2(X_train, Y_train, test)gridxgb2.fit(S_train,Y_train)gridxgb2_best = gridxgb2.best_estimator_print(gridxgb2.best_score_)
In [91]:
test_Survived = pd.Series(gridxgb2.predict(S_test), name="Survived")results_stacking = pd.concat([IDtest,test_Survived],axis=1)results_stacking.to_csv("ensemble_python_stacking.csv",index=False)
阅读全文
0 0
- kaggle系列(一、Titanic入门比赛)
- Kaggle入门系列:(三)Titanic竞赛初试身手
- Kaggle入门系列:(一)Kaggle简介
- Titanic Kaggle 竞赛系列
- Kaggle入门 (Titanic XGBoost)
- Kaggle实例-Titanic分析(一)
- kaggle入门篇二【Titanic】
- Kaggle入门 (Titanic TensorFlow Softmax)
- Python Pandas数据处理入门(Kaggle Titanic竞赛数据)
- kaggle竞赛入门:titanic数据预测学习(翻译)
- Kaggle 比赛入门
- Kaggle入门——Titanic案例
- Kaggle入门数据集Titanic代码记录
- 【kaggle】Titanic
- Kaggle: Titanic
- kaggle:titanic
- kaggle-Titanic
- Kaggle比赛经验总结之Titanic: Machine Learning from Disaster
- fatal error: nsync_cv.h: No such file or directory
- J2SE桌面小程序包含配置文件打成jar包引出的一系列问题(2)-maven打jar包 getResource为null问题解析
- Intellji IDE使用mybatis-generator自动生成mybatis相关文件
- MySQLSyntaxErrorException
- linux下常用的操作命令
- kaggle系列(一、Titanic入门比赛)
- 微信公众平台用新浪云自带的域名提示非微信官方网页怎么解决办法
- PHP Filesystem 函数例
- 使用python开发的教务管理系统
- Stellar Phoenix Windows Data Recovery(数据恢复软件)官方破解版V7.0.0.3.DC.12.10下载 | 含破解补丁无需激活码
- okhttp源码解析
- 简单的MVC封装方法
- scrapy抓取腾讯招聘数据并入库mongodb(浅)
- 第五次