python-pandas的基本用法02

来源:互联网 发布:北方民族大学网络 编辑:程序博客网 时间:2024/06/07 10:24

pandas的基本用法02-DataFrame基础

    #coding:utf-8    import numpy as np    from pandas import Series,DataFrame    print '用字典生成DataFrame,key为列的名字。'    data = {'city':['Beijing', 'Shanghai', 'Shenzheng', 'Nanjing', 'Hangzhou'],            'gdp':[8000, 9000, 3000, 4000, 4500],            'pop':[2500, 3500, 500, 1500, 1000]}    print DataFrame(data)    #         city   gdp   pop    # 0    Beijing  8000  2500    # 1   Shanghai  9000  3500    # 2  Shenzheng  3000   500    # 3    Nanjing  4000  1500    # 4   Hangzhou  4500  1000    print '指定列顺序:'    print DataFrame(data, columns=['city', 'pop', 'gdp'])    #         city   pop   gdp    # 0    Beijing  2500  8000    # 1   Shanghai  3500  9000    # 2  Shenzheng   500  3000    # 3    Nanjing  1500  4000    # 4   Hangzhou  1000  4500    print '指定索引,在列中指定不存在的列,默认数据用NaN'    data2 = DataFrame(data, columns=['city', 'pop', 'gdp', 'env'],                      index=['one', 'two', 'three', 'four', 'five']                      )    print data2    #    city   pop   gdp  env    # one      Beijing  2500  8000  NaN    # two     Shanghai  3500  9000  NaN    # three  Shenzheng   500  3000  NaN    # four     Nanjing  1500  4000  NaN    # five    Hangzhou  1000  4500  NaN    print data2.city    # Name: city, dtype: object    print data2['city']    # one        Beijing    # two       Shanghai    # three    Shenzheng    # four       Nanjing    # five      Hangzhou    # Name: city, dtype: object    print data2.ix['three']    # city    Shenzheng    # pop           500    # gdp          3000    # env           NaN    # Name: three, dtype: object    data2.env = np.arange(5)    print data2    #             city   pop   gdp  env    # one      Beijing  2500  8000    0    # two     Shanghai  3500  9000    1    # three  Shenzheng   500  3000    2    # four     Nanjing  1500  4000    3    # five    Hangzhou  1000  4500    4    print '用Series指定要修改的索引及其对应的值,没有指定的默认数据用NaN。'    val = Series([5,3,1,3,2], index=['one', 'two', 'three', 'four', 'five'])    data2.env = val    print data2    #             city   pop   gdp  env    # one      Beijing  2500  8000    5    # two     Shanghai  3500  9000    3    # three  Shenzheng   500  3000    1    # four     Nanjing  1500  4000    3    # five    Hangzhou  1000  4500    2    print '赋值给新列'    data2['suit'] = (data2.city == 'Shenzheng')    print data2    #             city   pop   gdp  env   suit    # one      Beijing  2500  8000    5  False    # two     Shanghai  3500  9000    3  False    # three  Shenzheng   500  3000    1   True    # four     Nanjing  1500  4000    3  False    # five    Hangzhou  1000  4500    2  False    print data2.columns    # Index([city, pop, gdp, env, suit], dtype=object)    print 'DataFrame转置'    print data2.T    #           one       two      three     four      five    # city  Beijing  Shanghai  Shenzheng  Nanjing  Hangzhou    # pop      2500      3500        500     1500      1000    # gdp      8000      9000       3000     4000      4500    # env         5         3          1        3         2    # suit    False     False       True    False     False    print '指定索引顺序,以及使用切片初始化数据。'    data2.index = [1,2,3,4,5]    print data2['city'][:-1]    # 1      Beijing    # 2     Shanghai    # 3    Shenzheng    # 4      Nanjing    # Name: city, dtype: object    print '打印索引和列的名称'    print data2.index.name    print data2.columns.name