python-pandas的基本用法11

来源:互联网 发布:李氏筋骨散淘宝 编辑:程序博客网 时间:2024/06/08 19:55

pandas的基本用法11-层次化索引

# -*- coding: utf-8 -*- import numpy as npfrom pandas import Series, DataFrame, MultiIndexprint 'Series的层次索引'data = Series([1,3,56,2,88, 32,43,12,65,90],              index = [['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],                       [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])print data# a  1     1#    2     3#    3    56# b  1     2#    2    88#    3    32# c  1    43#    2    12# d  2    65#    3    90print data.index# [a  1,    2,    3, b  1,    2,    3, c  1,    2, d  2,    3]print data[:2]# a  1    1#    2    3print data.unstack()#     1   2   3# a   1   3  56# b   2  88  32# c  43  12 NaN# d NaN  65  90print data.unstack().stack()# a  1     1#    2     3#    3    56# b  1     2#    2    88#    3    32# c  1    43#    2    12# d  2    65#    3    90print 'DataFrame的层次索引'frame = DataFrame(np.arange(12).reshape((4, 3)),                  index = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]],                  columns = [['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']])print frame#       Ohio       Colorado#      Green  Red     Green# a 1      0    1         2#   2      3    4         5# b 1      6    7         8#   2      9   10        11frame.index.names = ['key1', 'key2']frame.columns.names = ['state', 'color']print frame# key1 key2                      # a    1         0    1         2#      2         3    4         5# b    1         6    7         8#      2         9   10        11print frame.ix['a', 1]# state     color# Ohio      Green    0#           Red      1# Colorado  Green    2print frame.ix['a', 2]['Colorado']# color# Green    5print frame.ix['a', 2]['Ohio']['Red']# 4print '直接用MultiIndex创建层次索引结构'print MultiIndex.from_arrays([['Ohio', 'Ohio', 'Colorado'], ['Gree', 'Red', 'Green']],                             names = ['state', 'color'])print '索引层级交换'frame_swapped = frame.swaplevel('key1', 'key2')print frame_swapped# state       Ohio       Colorado# color      Green  Red     Green# key2 key1                      # 1    a         0    1         2# 2    a         3    4         5# 1    b         6    7         8# 2    b         9   10        11print frame_swapped.swaplevel(0, 1) #也可以这样写# state       Ohio       Colorado# color      Green  Red     Green# key1 key2                      # a    1         0    1         2#      2         3    4         5# b    1         6    7         8#      2         9   10        11print '根据索引排序'print frame.sortlevel('key2')# state       Ohio       Colorado# color      Green  Red     Green# key1 key2                      # a    1         0    1         2# b    1         6    7         8# a    2         3    4         5# b    2         9   10        11print frame.swaplevel(0, 1).sortlevel(0)# state       Ohio       Colorado# color      Green  Red     Green# key2 key1                      # 1    a         0    1         2#      b         6    7         8# 2    a         3    4         5#      b         9   10        11print '根据指定的key计算统计信息'print frame.sum(level = 'key2')# state   Ohio       Colorado# color  Green  Red     Green# key2                       # 1          6    8        10# 2         12   14        16print '使用列生成层次索引'frame = DataFrame({'a':range(7),                   'b':range(7, 0, -1),                   'c':['one', 'one', 'one', 'two', 'two', 'two', 'two'],                   'd':[0, 1, 2, 0, 1, 2, 3]})print frame#  a  b    c  d# 0  0  7  one  0# 1  1  6  one  1# 2  2  5  one  2# 3  3  4  two  0# 4  4  3  two  1# 5  5  2  two  2# 6  6  1  two  3print frame.set_index(['c', 'd'])  # 把c/d列变成索引# c   d      # one 0  0  7#     1  1  6#     2  2  5# two 0  3  4#     1  4  3#     2  5  2#     3  6  1cdprint frame.set_index(['c', 'd'], drop = False) # cd列依然保留# c   d              # one 0  0  7  one  0#     1  1  6  one  1#     2  2  5  one  2# two 0  3  4  two  0#     1  4  3  two  1#     2  5  2  two  2#     3  6  1  two  3frame2 = frame.set_index(['c', 'd'])print frame2.reset_index() #还原#      c  d  a  b# 0  one  0  0  7# 1  one  1  1  6# 2  one  2  2  5# 3  two  0  3  4# 4  two  1  4  3# 5  two  2  5  2# 6  two  3  6  1
原创粉丝点击