pandas练习

来源:互联网 发布:各国域名缩写 编辑:程序博客网 时间:2024/05/16 04:32
更多函数查阅http://pandas.pydata.org/pandas-docs/stable/10min.htmlimport pandas as pd#两种数据结构from pandas import Series,DataFrame#Series由一组数据和一组索引组成# obj=Series([2,3,5,7,3,1])# print(obj)#建立Series的时候指定索引# obj2=Series([1,2,3,4,5],index=['a','b','c','d','e'])# # print(obj2)# print(obj2['a'])#指定索引输出# print(obj2[['a','d']])#指定多个索引输出# print('a' in obj2)#判断索引'a'是否在Series中,输出值为bool类型# obj2.name='wolawola'#设置name属性# print(obj2.name)#输出name属性# print(obj2.index.name)#输出索引的name属性#字典数据类型创建Series# data={'a':1000,'b':2000,'c':3000,'d':4000}# s=Series(data)# print(s)#Series进行运算# data={'a':1000,'b':2000,'c':3000,'d':4000}# s=Series(data)# print(s*2)#关系映射,返回值是bool类型# data={'a':1000,'b':2000,'c':3000,'d':4000}# s=Series(data)# print('a' in s)#分别创建index和value创建Series# dt=[1,2,3,4,5]# id=['a','b','c','d','e','f']# data=Series(dt,index=id)# print(data)#判断是否为空值# data={'a':1000,'b':2000,'c':3000,'d':4000}# s=Series(data)# print(s.isnull)#----------------------------------------------------#DataFrame#字典数据类型创建DataFrame# data={'state':['a','b','c','d'],#       'year':[1991,1992,1993,1994],#       'pop':[6,7,8,9]}# frame=DataFrame(data)# frame=DataFrame(data,columns=['year','state','pop','age'])#没有age,传入NaN值# print(frame)# # print(frame['year'])#输出年份# # print(frame.state)# print(frame.columns)#输出列名# frame['age']=np.arange(4)#给age属性增加值# print(frame)#嵌套字典:外层的键作为列,内层的键作为行索引# pop={'a':{1:1000,2:2000},#      'b':{1:5000,3:3000,4:4000},#      }# frame=DataFrame(pop)# print(frame)# print(frame.T)#转置#索引对象是不可以修改的,以下报错immutable# obj=Series(range(3),index=['a','b','c'])# obj.index[1]='d'# print(obj.index)#reindex:重新索引,若值缺失则引入缺失值# obj=Series([23,4.5,-8,100],index=['b','c','a','d'])# print(obj)# reind=obj.reindex(['a','b','c','d','e'])# print(reind)#向前填充索引,从index=0开始,遇到index=3时将值变为4.5#range()中的值是填充的范围,即索引的取值区间# obj=Series([23,4.5,-8],index=[0,3,5])# reind=obj.reindex(range(9),method='ffill')#ffill或pad:向前填充,bfill或backfill向后填充# print(reind)#修改行索引,列,或者两个都修改,则会重新索引# frame=DataFrame(np.arange(9).reshape([3,3]),index=['a','b','c'],columns=['no.1','no.2','no.3'])# print(frame)# fr=frame.reindex(['a','b','c','d'])# print(fr)#drop:丢弃制定轴上的项# frame=DataFrame(np.arange(9).reshape([3,3]),index=['a','b','c'],columns=['no.1','no.2','no.3'])# new_frame=frame.drop('a')# new_frame2=frame.drop('no.1',axis=1)# print(new_frame)# print(new_frame2)#DataFrame和bool一起使用# frame=DataFrame(np.arange(9).reshape([3,3]),index=['a','b','c'],columns=['no.1','no.2','no.3'])# print(frame>3)#ix在DataFrame中# frame=DataFrame(np.arange(9).reshape([3,3]),index=['a','b','c'],columns=['no.1','no.2','no.3'])# f2=frame.ix['a','no.1']#两个参数,第一个是行,第二个是列# print(frame)# print(f2)#数据对齐# frame=DataFrame(np.arange(9).reshape([3,3]),index=['a','b','c'])# frame2=DataFrame(np.arange(16).reshape([4,4]),index=['a','b','c','d'])# print(frame+frame2)#重新索引frame,用frame2的列,所以会有空值,空值填0# frame=DataFrame(np.arange(9).reshape([3,3]),index=['a','b','c'])# frame2=DataFrame(np.arange(16).reshape([4,4]),index=['a','b','c','d'])# f=frame.reindex(columns=frame2.columns,fill_value=0)# print(f)#索引排序# frame=DataFrame(np.arange(8).reshape([2,4]),index=['three','one'],columns=['a','d','c','b'])# print(frame)# f1=frame.sort_index()# print(f1)# f2=frame.sort_index(axis=1)# print(f2)#Seriex ranking# s=Series([3,6,1,5,-1])# print(s.rank())#返回名次,从小到大# print(s.rank(ascending=False))#降序#DataFormat ranking# frame=DataFrame({'b':[2.5,1.6,9.0],'a':[4,7,5.7],'c':[2,9,7]})# print(frame)# f1=frame.rank(axis=1)# print(f1)#DataFrame和Series之间的运算:每行依次相减# arr=np.arange(12).reshape([3,4])# rs=arr-arr[0]# print(rs)#DataFrame和Series之间算术运算会将Series的索引匹配到DataFrame的列,然后沿着行一直向下广播#describe# obj=Series(['a','b','c','d'])# print(obj)# print(obj.describe())#切分行和列# arr=np.arange(12).reshape([3,4])# df=DataFrame(arr,index=['a','b','c'],columns=['no.1','no.2','no.3','no.4'])# print(df)# print(df.iloc[1:3,2:4])#切分[1:3)行,[2:4)列