Python-Pandas(4)自定义函数方法

来源:互联网 发布:2017淘宝标题优化技巧 编辑:程序博客网 时间:2024/06/05 16:32
#specifying axis=1 or axis='columns' will drop any columns that have null valuesdrop_na_columns = titanic_survival.dropna(axis=1)new_titanic_survival = titanic_survival.dropna(axis=0,subset=["Age", "Sex"])#print new_titanic_survival
row_index_83_age = titanic_survival.loc[83,"Age"]row_index_1000_pclass = titanic_survival.loc[766,"Pclass"]print row_index_83_ageprint row_index_1000_pclass

这里写图片描述

new_titanic_survival = titanic_survival.sort_values("Age",ascending=False)print new_titanic_survival[0:10]itanic_reindexed = new_titanic_survival.reset_index(drop=True)print(titanic_reindexed.iloc[0:10])

这里写图片描述

# This function returns the hundredth item from a seriesdef hundredth_row(column):    # Extract the hundredth item    hundredth_item = column.iloc[99]    return hundredth_item# Return the hundredth item from each columnhundredth_row = titanic_survival.apply(hundredth_row)print hundredth_row

这里写图片描述

def not_null_count(column):    column_null = pd.isnull(column)    null = column[column_null]    return len(null)column_null_count = titanic_survival.apply(not_null_count)print column_null_count

这里写图片描述

#By passing in the axis=1 argument, we can use the DataFrame.apply() method to iterate over rows instead of columns.def which_class(row):    pclass = row['Pclass']    if pd.isnull(pclass):        return "Unknown"    elif pclass == 1:        return "First Class"    elif pclass == 2:        return "Second Class"    elif pclass == 3:        return "Third Class"classes = titanic_survival.apply(which_class, axis=1)print classes

这里写图片描述

def is_minor(row):    if row["Age"] < 18:        return True    else:        return Falseminors = titanic_survival.apply(is_minor, axis=1)#print minorsdef generate_age_label(row):    age = row["Age"]    if pd.isnull(age):        return "unknown"    elif age < 18:        return "minor"    else:        return "adult"age_labels = titanic_survival.apply(generate_age_label, axis=1)print age_labels

这里写图片描述

titanic_survival['age_labels'] = age_labelsage_group_survival = titanic_survival.pivot_table(index="age_labels", values="Survived")print age_group_survival

这里写图片描述

原创粉丝点击