Python_pandas_20171102

来源:互联网 发布:mysql数据库的安装 编辑:程序博客网 时间:2024/05/29 02:34

所有关于numpy和pnadas的代码和资料均来自于网易云视频:
用 numpy 和 pandas 把玩你的数据
1.Pandas 基本介绍

#!/usr/bin/env python# -*- coding: utf-8 -*-# @Date    : 2017-11-02 15:23:42# @Author  : Leboryi@gmail.com# @What    : Pandas 基本介绍import numpy as npimport pandas as pds = pd.Series([1,3,6,np.nan,44,1])# print(s)dates = pd.date_range('20170101',periods=6)# print(dates)df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d'])# print(df)# index定义行,columns定义列df1 = pd.DataFrame(np.arange(12).reshape((3,4)))# print(df1)df2 = pd.DataFrame({'A':1.,                    'B':pd.Timestamp('20130102'),                    'C':pd.Series(1,index=list(range(4)),dtype='float32'),                    'D':np.array([3]*4,dtype='int32'),                    'E':pd.Categorical(['test','train','test','train']),                    'F':'foo'})# print(df2)# print(df2.dtypes)# print(df2.index)# print(df2.columns)# print(df2.values)# print(df2.describe())# print(df2.T)# print(df2.sort_index(axis=1,ascending=False))# print(df2.sort_index(axis=0,ascending=False))# 排序print(df2.sort_values(by='E'))

2.Pandas concat 合并

#!/usr/bin/env python# -*- coding: utf-8 -*-# @Date    : 2017-11-02 21:21:24# @Author  : Lebooryi@gmail.com# @Version : Pandas concat 合并import pandas as pdimport numpy as np#concatnating# df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])# df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])# df3 = pd.DataFrame(np.ones((3,4))*2, columns=['a','b','c','d'])# print(df1)# print(df2)# print(df3)# res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)# print(res)# join,['inner','outer']# df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'], index=[1,2,3])# df2 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])# print(df1)# print(df2)# res = pd.concat([df1, df2])# print(res)# res = pd.concat([df1, df2], axis=1, join='outer')# res = pd.concat([df1, df2], axis=1, join='inner')# print(res)# join_axes# res = pd.concat([df1, df2], axis=1, join_axes=[df2.index])# print(res)df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])df3 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])print(df1)print(df2)# res = df1.append(df2, ignore_index=True)# print(res)# res = df1.append([df2,df3])# print(res)s1 = pd.Series([1,2,3,4], index=['a','b','c','d'])res = df1.append(s1, ignore_index=True)print(res)

3.Pandas设置值

#!/usr/bin/env python# -*- coding: utf-8 -*-# @Date    : 2017-11-02 16:14:44# @Author  : Lebooryi@gmail.com# @Version : Pandas设置值import numpy as npimport pandas as pddates = pd.date_range('20130101',periods=6)df = pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D'])df.iloc[2,2] = 1111df.loc['20130101','B'] = 2222df.B[df.A > 4] = 0# 推荐用这一种改变值df['F'] = np.nandf['E'] = pd.Series([1,2,3,4,5,6],index=pd.date_range('20130101',periods=6))print(df)

4.Pandas 处理丢失数据

#!/usr/bin/env python# -*- coding: utf-8 -*-# @Date    : 2017-11-02 21:00:15# @Author  : Lebooryi@gmail.com# @Version : Pandas 处理丢失数据import numpy as npimport pandas as pddates = pd.date_range('20130101',periods=6)df = pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D'])df.iloc[0,1] = np.nandf.iloc[1,2] = np.nanprint(df)print(df.dropna(axis=0,how='any'))print(df.dropna(axis =1,how='all'))    # how = {'any','all'}print(df.fillna(value=0))print(df.isnull())    # 检查是否有丢失数据print(np.any(df.isnull() == True))

5.Pandas 导入导出数据

#!/usr/bin/env python# -*- coding: utf-8 -*-# @Date    : 2017-11-02 21:10:13# @Author  : Lebooryi@gmail.com# @Version : Pandas 导入导出数据# read_pickle 读取import pandas as pd# read fromdata = pd.read_csv('student.csv')print(data)# save todata.to_pickle('student.pickle')

6.Pandas concat 合并

#!/usr/bin/env python# -*- coding: utf-8 -*-# @Date    : 2017-11-02 21:21:24# @Author  : Lebooryi@gmail.com# @Version : Pandas concat 合并import pandas as pdimport numpy as np#concatnating# df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])# df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])# df3 = pd.DataFrame(np.ones((3,4))*2, columns=['a','b','c','d'])# print(df1)# print(df2)# print(df3)# res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)# print(res)# join,['inner','outer']# df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'], index=[1,2,3])# df2 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])# print(df1)# print(df2)# res = pd.concat([df1, df2])# print(res)# res = pd.concat([df1, df2], axis=1, join='outer')# res = pd.concat([df1, df2], axis=1, join='inner')# print(res)# join_axes# res = pd.concat([df1, df2], axis=1, join_axes=[df2.index])# print(res)df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])df3 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])print(df1)print(df2)# res = df1.append(df2, ignore_index=True)# print(res)# res = df1.append([df2,df3])# print(res)s1 = pd.Series([1,2,3,4], index=['a','b','c','d'])res = df1.append(s1, ignore_index=True)print(res)

5.所用到的student.scv的下载链接:student.scv