Kaggle: Titanic

来源:互联网 发布:淘宝9.9抢购 编辑:程序博客网 时间:2024/04/27 18:00

所需数据下载地址:https://www.kaggle.com/c/titanic/data

# -*- coding:utf-8 -*-import pandas as pdimport numpy as npfrom sklearn import preprocessingtitanic_train=pd.read_csv("train.csv")age_median=titanic_train['Age'].median()titanic_train["Embarked"] = titanic_train["Embarked"].fillna("S")titanic_train.loc[titanic_train["Sex"]=="male","Sex"]=0titanic_train.loc[titanic_train["Sex"]=="female","Sex"]=1titanic_train.loc[titanic_train["Embarked"]=="C","Embarked"]=-1titanic_train.loc[titanic_train["Embarked"]=="Q","Embarked"]=0titanic_train.loc[titanic_train["Embarked"]=="S","Embarked"]=1titanic_train["Family_size"]=titanic_train["SibSp"]+titanic_train["Parch"]# titanic_train['Age']=titanic_train['Age'].fillna(age_median)train_noage=titanic_train[titanic_train["Age"].isnull()]train_age=titanic_train[-titanic_train["Age"].isnull()]train_age_features=train_age[["Pclass","Sex","Age","Embarked","Family_size"]]train_noage_features=train_noage[["Pclass","Sex","Embarked","Family_size"]]train_age_target=train_age["Survived"]train_noage_target=train_noage["Survived"]from sklearn.linear_model import LogisticRegressionfrom sklearn.model_selection import cross_val_scoreclassifier  = LogisticRegression()scores = cross_val_score(classifier,train_age_features,train_age_target,cv=5)  #交叉验证# print "包含Age特征:",scores,scores.mean()# #classifier_noage=LogisticRegression()scores_noage = cross_val_score(classifier_noage,train_noage_features,train_noage_target,cv=5)  #交叉验证# print "不包含Age特征:",scores_noage,scores_noage.mean()titanic_test= pd.read_csv("test.csv")titanic_gender=pd.read_csv("gender_submission.csv")titanic_test.loc[titanic_test["Sex"]=="male","Sex"]=0titanic_test.loc[titanic_test["Sex"]=="female","Sex"]=1titanic_test.loc[titanic_test["Embarked"]=="C","Embarked"]=-1titanic_test.loc[titanic_test["Embarked"]=="Q","Embarked"]=0titanic_test.loc[titanic_test["Embarked"]=="S","Embarked"]=1titanic_test["Embarked"]=titanic_test["Embarked"].fillna("S")titanic_test["Family_size"]=titanic_test["SibSp"]+titanic_test["Parch"]titanic_test["gender"]=titanic_gender["Survived"]age_test=titanic_test[titanic_test["Age"].notnull()]noage_test=titanic_test[titanic_test["Age"].isnull()]age_features=age_test[["Pclass","Sex","Age","Embarked","Family_size"]]noage_features=noage_test[["Pclass","Sex","Embarked","Family_size"]]titanic_test["predict"]=''# 拟合classifier.fit(train_age_features,train_age_target)classifier_noage.fit(train_noage_features,train_noage_target)titanic_test.loc[titanic_test["Age"].isnull(),"predict"]=classifier_noage.predict(noage_features)titanic_test.loc[titanic_test["Age"].notnull(),"predict"]=classifier.predict(age_features)acc=1-sum(abs(titanic_test["predict"]-titanic_test["gender"]))*1.0/len(titanic_test)predicted=titanic_test[["PassengerId","predict"]]predicted.to_csv('predicted.csv',index=False)print acc
原创粉丝点击