pandas基础
来源:互联网 发布:手机淘宝下载安装2017免费下载 编辑:程序博客网 时间:2024/06/04 19:01
pandas基础
pandas的基本用法01-Series
#coding:utf-8from pandas import Seriesobj = Series([4, 7, -5, 3], index = ['a', 'b', 'c', 'd'])print obj# a 4# b 7# c -5# d 3# dtype: int64print obj[['a', 'b', 'c']]# a 4# b 7# c -5# dtype: int64print obj[obj>0]# a 4# b 7# d 3# dtype: int64print 'b' in obj# Trueprint '使用字典生成Series'dic = {'Paul':95, 'James':98, 'Harden':94}obj = Series(dic)print obj# Harden 94# James 98# Paul 95# dtype: int64name = ['Wade', 'James', 'Harden']obj = Series(dic, name)print obj# Wade NaN# James 98# Harden 94# dtype: float64print '指定Series及其索引的名字'obj.name = 'name'obj.index.name = 'score'print obj# score# Wade NaN# James 98# Harden 94# Name: name, dtype: float64print '替换index'obj.index = ['韦德', '詹姆斯', '哈登']print obj# 韦德 NaN# 詹姆斯 98# 哈登 94# Name: name, dtype: float64
pandas的基本用法02-DataFrame基础
#coding:utf-8import numpy as npfrom pandas import Series,DataFrameprint '用字典生成DataFrame,key为列的名字。'data = {'city':['Beijing', 'Shanghai', 'Shenzheng', 'Nanjing', 'Hangzhou'], 'gdp':[8000, 9000, 3000, 4000, 4500], 'pop':[2500, 3500, 500, 1500, 1000]}print DataFrame(data)# city gdp pop# 0 Beijing 8000 2500# 1 Shanghai 9000 3500# 2 Shenzheng 3000 500# 3 Nanjing 4000 1500# 4 Hangzhou 4500 1000print '指定列顺序:'print DataFrame(data, columns=['city', 'pop', 'gdp'])# city pop gdp# 0 Beijing 2500 8000# 1 Shanghai 3500 9000# 2 Shenzheng 500 3000# 3 Nanjing 1500 4000# 4 Hangzhou 1000 4500print '指定索引,在列中指定不存在的列,默认数据用NaN'data2 = DataFrame(data, columns=['city', 'pop', 'gdp', 'env'], index=['one', 'two', 'three', 'four', 'five'] )print data2# city pop gdp env# one Beijing 2500 8000 NaN# two Shanghai 3500 9000 NaN# three Shenzheng 500 3000 NaN# four Nanjing 1500 4000 NaN# five Hangzhou 1000 4500 NaNprint data2.city# Name: city, dtype: objectprint data2['city']# one Beijing# two Shanghai# three Shenzheng# four Nanjing# five Hangzhou# Name: city, dtype: objectprint data2.ix['three']# city Shenzheng# pop 500# gdp 3000# env NaN# Name: three, dtype: objectdata2.env = np.arange(5)print data2# city pop gdp env# one Beijing 2500 8000 0# two Shanghai 3500 9000 1# three Shenzheng 500 3000 2# four Nanjing 1500 4000 3# five Hangzhou 1000 4500 4print '用Series指定要修改的索引及其对应的值,没有指定的默认数据用NaN。'val = Series([5,3,1,3,2], index=['one', 'two', 'three', 'four', 'five'])data2.env = valprint data2# city pop gdp env# one Beijing 2500 8000 5# two Shanghai 3500 9000 3# three Shenzheng 500 3000 1# four Nanjing 1500 4000 3# five Hangzhou 1000 4500 2print '赋值给新列'data2['suit'] = (data2.city == 'Shenzheng')print data2# city pop gdp env suit# one Beijing 2500 8000 5 False# two Shanghai 3500 9000 3 False# three Shenzheng 500 3000 1 True# four Nanjing 1500 4000 3 False# five Hangzhou 1000 4500 2 Falseprint data2.columns# Index([city, pop, gdp, env, suit], dtype=object)print 'DataFrame转置'print data2.T# one two three four five# city Beijing Shanghai Shenzheng Nanjing Hangzhou# pop 2500 3500 500 1500 1000# gdp 8000 9000 3000 4000 4500# env 5 3 1 3 2# suit False False True False Falseprint '指定索引顺序,以及使用切片初始化数据。'data2.index = [1,2,3,4,5]print data2['city'][:-1]# 1 Beijing# 2 Shanghai# 3 Shenzheng# 4 Nanjing# Name: city, dtype: objectprint '打印索引和列的名称'print data2.index.nameprint data2.columns.name
pandas的基本用法03-reindex()
# -*- coding: utf-8 -*- import numpy as np from pandas import DataFrame, Series from matplotlib.pyplot import axis s = Series([1,2,3,4], index=['a','b','c','d']) s2 = s.reindex(['a','b','c','d','e'], fill_value=0) print s2 # a 1 # b 2 # c 3 # d 4 # e 0 # dtype: int64 s2 = s.reindex(['a','b','c','d','e'], method='ffill') print s2 # a 1 # b 2 # c 3 # d 4 # e 4 # dtype: int64 print '对DataFrame重新指定索引' f = DataFrame(np.arange(9).reshape(3, 3), index = ['i1', 'i2', 'i3'], columns = ['c1','c2','c3']) print f # c1 c2 c3 # i1 0 1 2 # i2 3 4 5 # i3 6 7 8 f2 = f.reindex(['a','b','c', 'd']) print f2 # c1 c2 c3 # a NaN NaN NaN # b NaN NaN NaN # c NaN NaN NaN # d NaN NaN NaN print '重新指定column' names = ['c1', 'c2', 'Tony'] print f.reindex(columns=names) # c1 c2 Tony # i1 0 1 NaN # i2 3 4 NaN # i3 6 7 NaN print '对DataFrame重新指定索引并指定填元素充方法' print f # c1 c2 c3 # i1 0 1 2 # i2 3 4 5 # i3 6 7 8 f2 = f.reindex(index=['i1', 'i2', 'x'], method='ffill', columns=names) print f2 # c1 c2 Tony # i1 0 1 NaN # i2 3 4 NaN # x 6 7 NaN print f2.fillna(method='ffill', axis=1) # c1 c2 Tony # i1 0 1 1 # i2 3 4 4 # x 6 7 7
pandas的基本用法04-drop()
# -*- coding: utf-8 -*- import numpy as np from pandas import Series, DataFrame from numpy.core.defchararray import index from matplotlib.pyplot import axis s = Series(np.arange(5.), index=[1,2,3,4,5]) newS = s.drop([1,3]) print newS # 2 1 # 4 3 # 5 4 # dtype: float64 print 'DataFrame删除元素,可指定索引或列。' df = DataFrame(np.arange(16).reshape(4,4), index = ['a','b','c','d'], columns=[1,2,3,4] ) print df # 1 2 3 4 # a 0 1 2 3 # b 4 5 6 7 # c 8 9 10 11 # b 12 13 14 15 print df.drop(['a', 'b']) #闭区间 ['a','b'] # 1 2 3 4 # c 8 9 10 11 # d 12 13 14 15 print df.drop([1,2], axis=1) # 3 4 # a 2 3 # b 6 7 # c 10 11 # d 14 15
pandas的基本用法05-索引
# -*- coding: utf-8 -*- import numpy as npfrom pandas import Series, DataFramefrom numpy.core.defchararray import indexprint 'Series的索引,默认数字索引可以工作。's = Series(np.arange(4.), index=['a','b','c','d'])print s[['a','b']]# a 0# b 1# dtype: float64print s[[0,1,2]]# a 0# b 1# c 2# dtype: float64print s[s>1]# c 2# d 3# dtype: float64print 'Series的数组切片'print s['a':'c']# a 0# b 1# c 2# dtype: float64print 'DataFrame的索引'df = DataFrame(np.arange(16).reshape(4,4), index = ['a', 'b','c','d'], columns=[1,2,3,4] )print df# 1 2 3 4# a 0 1 2 3# b 4 5 6 7# c 8 9 10 11# d 12 13 14 15print '打印列'print df[[1,2]]# 1 2# a 0 1# b 4 5# c 8 9# d 12 13print '打印行'print df[:2]# 1 2 3 4# a 0 1 2 3# b 4 5 6 7print df.ix[:2]# 1 2 3 4# a 0 1 2 3# b 4 5 6 7print '指定索引和列'print df.ix[['a','b'], [1,2,3]]# 1 2 3# a 0 1 2# b 4 5 6print df.ix[:'c', :3]# 1 2 3# a 0 1 2# b 4 5 6# c 8 9 10print '根据条件选择'print df>3# 1 2 3 4# a False False False False# b True True True True# c True True True True# d True True True Trueprint df[[1,2]]>3# 1 2# a False False# b True True# c True True# d True Truedf[df<3]=0print df# 1 2 3 4# a 0 0 0 3# b 4 5 6 7# c 8 9 10 11# d 12 13 14 15print df.T# a b c d# 1 0 4 8 12# 2 0 5 9 13# 3 0 6 10 14# 4 3 7 11 15
pandas的基本用法06-算术和数据对齐
# -*- coding: utf-8 -*- import numpy as npfrom pandas import Series, DataFrameimport dataframeDemofrom matplotlib.pyplot import axiss1 = Series([-1,3,4], index=['a','b','c'])s2 = Series([0,9,2,5], index=['a','b','c','d'])print s1+s2# a -1# b 12# c 6# d NaN# dtype: float64print 'DataFrame加法'df1 = DataFrame(np.arange(9).reshape(3,3), index=list('abc'), columns=list('123') )df2 = DataFrame(np.arange(9).reshape(3,3), index=list('abd'), columns=list('023') )print df1print df2print df1+df2# 1 2 3# a 0 1 2# b 3 4 5# c 6 7 8# +# 0 2 3# a 0 1 2# b 3 4 5# d 6 7 8# =# 0 1 2 3# a NaN NaN 2 4# b NaN NaN 8 10# c NaN NaN NaN NaN# d NaN NaN NaN NaNprint '数据填充'df1 = DataFrame(np.arange(12.).reshape(3,4))df2 = DataFrame(np.arange(20.).reshape(4,5))print df1# 0 1 2 3# 0 0 1 2 3# 1 4 5 6 7# 2 8 9 10 11print df2# 0 1 2 3 4# 0 0 1 2 3 4# 1 5 6 7 8 9# 2 10 11 12 13 14# 3 15 16 17 18 19print df1.add(df2)# 0 1 2 3 4# 0 0 2 4 6 NaN# 1 9 11 13 15 NaN# 2 18 20 22 24 NaN# 3 NaN NaN NaN NaN NaNprint df1.add(df2, fill_value=0)# 0 1 2 3 4# 0 0 2 4 6 4# 1 9 11 13 15 9# 2 18 20 22 24 14# 3 15 16 17 18 19print df1.reindex(columns=df2.columns, fill_value=0)# 0 1 2 3 4# 0 0 1 2 3 0# 1 4 5 6 7 0# 2 8 9 10 11 0print 'DataFrame与Series之间的操作'dataframe = DataFrame(np.arange(12.).reshape(3,4))series = dataframe.ix[0]print dataframe# 0 1 2 3# 0 0 1 2 3# 1 4 5 6 7# 2 8 9 10 11print series# 0 0# 1 1# 2 2# 3 3print dataframe-series# 0 1 2 3# 0 0 0 0 0# 1 4 4 4 4# 2 8 8 8 8series = dataframe[0]print dataframe.sub(series, axis=0)# 按列减# 0 1 2 3# 0 0 1 2 3# 1 0 1 2 3# 2 0 1 2 3
pandas的基本用法07-匿名函数
# -*- coding: utf-8 -*- import numpy as npfrom pandas import Series, DataFramefrom pandas.core.format import DataFrameFormatterprint 'lambda以及应用'frame = DataFrame(np.random.randn(3,4), index=list('abc'),columns=[1,2,3,4] )print frame# 1 2 3 4# a 1.099778 -0.953612 0.776224 -0.751771# b 0.094904 -0.882894 -0.689881 -0.694949# c 1.329955 0.137261 2.276389 0.546899f = lambda x:x.max()-x.min()# apply() 和applymap()是DataFrame数据类型的函数# applymap()是element-wise的,作用于每个DataFrame的每个数据。 # map()是Series数据类型的函数,也是element-wise的,对Series中的每个数据调用一次函数。print frame.apply(f, axis=1)#按行减# a 2.680024# b 3.077474# c 1.255980_format = lambda x: '%.2f'%xprint frame.applymap(_format)# 1 2 3 4# a -0.23 -1.47 -0.16 0.40# b 0.21 0.94 -0.22 -3.51# c -0.30 -0.35 -0.25 0.76print frame[1].map(_format)# a 0.63# b 0.51# c 0.11
pandas的基本用法08-重复索引和排序排名
# -*- coding: utf-8 -*- import numpy as npfrom pandas import Series, DataFrameprint '重复的索引's = Series(range(5), index=['a','a','b','b','c'])print s# a 0# a 1# b 2# b 3# c 4print s['a'][0]# 0df = DataFrame(np.random.randn(4, 3), index = ['a', 'a', 'b', 'b'])print df# a 0.818048 -2.071100 -1.278806# a -1.113891 1.694715 0.766724# b 0.606746 -0.846371 -1.685604# b -0.174152 -0.902854 -0.893771print df.ix['a'].ix[0]# 0 0.818048# 1 -2.071100# 2 -1.278806print '根据索引排序,对于DataFrame可以指定轴。's = Series(range(4), index = ['d', 'a', 'b', 'c'])print s.sort_index()# a 1# b 2# c 3# d 0frame = DataFrame(np.arange(8).reshape((2, 4)), index = ['three', 'one'], columns = list('dabc'))print frame# d a b c# three 0 1 2 3# one 4 5 6 7print frame.sort_index() #index排序# d a b c# one 4 5 6 7# three 0 1 2 3print frame.sort_index(axis=1) #columns排序# a b c d# three 1 2 3 0# one 5 6 7 4print frame.sort_index(axis=1, ascending=False) #columns降序# d c b a# three 0 3 2 1# one 4 7 6 5print '根据值排序's = Series([4,-1,10,9])print s.order() #新版本pandas: sort_values代替# 1 -1# 0 4# 3 9# 2 10print 'DataFrame指定列排序'df = DataFrame({'b':[-2,0,10,3], 'a':[0,1,1,0]})print df# a b# 0 0 -2# 1 1 0# 2 1 10# 3 0 3print df.sort_index(by='b')# a b# 0 0 -2# 1 1 0# 3 0 3# 2 1 10print 'rank,求排名的平均位置(从1开始)'s = Series([7, -5, 7, 4, 4]) #-5 4 4 7 7print s.rank()# 0 4.5 (4+5)/2# 1 1.0# 2 4.5# 3 2.5 (1+2)/2# 4 2.5print s.rank(method = 'first') # 去第一次出现,不求平均值。# 0 4# 1 1# 2 5# 3 2# 4 3print s.rank(ascending = False, method = 'max') # 逆序,并取最大值。所以-5的rank是5.# 0 2# 1 5# 2 2# 3 4# 4 4print 'dataframe 排名'frame = DataFrame({'b':[4.3, 7, -3, 2], 'a':[0, 1, 0, 1], 'c':[-2, 5, 8, -2.5]})print frame# a b c# 0 0 4.3 -2.0# 1 1 7.0 5.0# 2 0 -3.0 8.0# 3 1 2.0 -2.5print frame.rank(axis = 0, method='first', ascending=False) #按列排# a b c# 0 3 2 3# 1 1 1 2# 2 4 4 1# 3 2 3 4
pandas的基本用法09-统计函数
# -*- coding: utf-8 -*- import numpy as npimport pandas as pdfrom pandas import Series, DataFrameprint '求和'df = DataFrame([[1.4, np.nan], [7.1, -4.5], [np.nan, np.nan], [0.75, -1.3]], index = ['a', 'b', 'c', 'd'], columns = ['one', 'two'])print df# one two# a 1.40 NaN# b 7.10 -4.5# c NaN NaN# d 0.75 -1.3print df.sum() #默认按列求和# one 9.25# two -5.80print df.sum(axis=1) #按行求和# a 1.40# b 2.60# c NaN# d -0.55print df.idxmax()# one b# two dprint df.cumsum()# one two# a 1.40 NaN# b 8.50 -4.5# c NaN NaN# d 9.25 -5.8print df.describe()# one two# count 3.000000 2.000000# mean 3.083333 -2.900000# std 3.493685 2.262742# min 0.750000 -4.500000# 25% 1.075000 -3.700000# 50% 1.400000 -2.900000# 75% 4.250000 -2.100000# max 7.100000 -1.300000# print '相关性与协方差' # all_data = {}# for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']:# all_data[ticker] = web.get_data_yahoo(ticker, '4/1/2016', '7/15/2015')# price = DataFrame({tic: data['Adj Close'] for tic, data in all_data.iteritems()})# volume = DataFrame({tic: data['Volume'] for tic, data in all_data.iteritems()})# returns = price.pct_change()# # print returns.tail()# print returns.MSFT.corr(returns.IBM)# print returns.corr() # 相关性,自己和自己的相关性总是1# print returns.cov() # 协方差# print returns.corrwith(returns.IBM)# print returns.corrwith(returns.volume)print '去重's = Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c'])print s.unique()# ['c' 'a' 'd' 'b']print pd.value_counts(s)# c 3# a 3# b 2# d 1print '判断元素存在'mask = s.isin(['a', 'b', 'c'])print mask# 0 True# 1 True# 2 False# 3 True# 4 True# 5 True# 6 True# 7 True# 8 Trueprint s[mask]# 0 c# 1 a# 3 a# 4 a# 5 b# 6 b# 7 c# 8 cdata = DataFrame({'c1':[1, 3, 4, 3, 4], 'c2':[2, 3, 1, 2, 3], 'c3':[1, 5, 2, 4, 4]})print data# c1 c2 c3# 0 1 2 1# 1 3 3 5# 2 4 1 2# 3 3 2 4# 4 4 3 4print data.apply(pd.value_counts).fillna(0)# c1 c2 c3# 1 1 1 1# 2 0 2 1# 3 2 2 0# 4 2 0 2# 5 0 0 1print data.apply(pd.value_counts, axis = 1).fillna(0)# 1 2 3 4 5# 0 2 1 0 0 0# 1 0 0 2 0 1# 2 1 1 0 1 0# 3 0 1 1 1 0# 4 0 0 1 2 0
pandas的基本用法10-处理NAN
# -*- coding: utf-8 -*- import numpy as npfrom numpy import nan as NAfrom pandas import Series,DataFrameprint '作为null处理的值's = Series(['aardvark', 'artichoke', np.nan, 'avocado'])print sprint s.isnull()# 0 False# 1 False# 2 True# 3 Falses[0] = Noneprint s.isnull()# 0 True# 1 False# 2 True# 3 Falseprint s.dropna()# 1 artichoke# 3 avocadoprint s[s.notnull()]# 1 artichoke# 3 avocadoprint 'DataFrame对丢弃NA的处理'data = DataFrame([[1., 6.5, 3.], [1., NA, NA], [NA, NA, NA], [NA, 6.5, 3.]])print data# 0 1 2# 0 1 6.5 3# 1 1 NaN NaN# 2 NaN NaN NaN# 3 NaN 6.5 3print data.dropna() # 默认只要某行有NA就全部删除# 0 1 2# 0 1 6.5 3print data.dropna(axis=0, how='all') #某行全部为na才删除# 0 1 2# 0 1 6.5 3# 1 1 NaN NaN# 3 NaN 6.5 3data = DataFrame(np.random.randn(7, 3))data.ix[:4, 1] = NAdata.ix[:2, 2] = NAprint data# 0 1 2# 0 0.819602 NaN NaN# 1 0.513875 NaN NaN# 2 1.232815 NaN NaN# 3 -0.272040 NaN -0.202212# 4 -0.485529 NaN -0.121475# 5 0.054189 0.025241 1.031688# 6 -1.729412 -0.975371 -2.013163print data.dropna(thresh = 2) # 每行 >=2个NA 就删除# 0 1 2# 3 -0.743106 NaN -0.460403# 4 -1.379843 NaN -0.495650# 5 1.151642 -1.087282 -2.163735# 6 -0.008196 0.674448 -0.650966print '填充0'data.fillna(0, inplace=True)print data# 0 1 2# 0 -0.392616 0.000000 0.000000# 1 -1.069262 0.000000 0.000000# 2 -0.751801 0.000000 0.000000# 3 -0.899334 0.000000 1.185419# 4 0.554094 0.000000 0.823630# 5 -0.799200 -0.655324 -0.590763# 6 -0.073688 -1.353579 -0.870911print '不同行列填充不同的值'data.ix[:4, 1] = NAdata.ix[:2, 2] = NAprint data# 0 1 2# 0 1.289977 NaN NaN# 1 0.556263 NaN NaN# 2 -1.388250 NaN NaN# 3 -0.424846 NaN -1.120281# 4 -0.885350 NaN 0.103914# 5 -0.043046 -0.190838 2.351472# 6 0.291554 0.837331 0.164307print data.fillna({1:0.5, 2:-1}) # 列1填0.5, 列2填-1
pandas的基本用法11-层次化索引
# -*- coding: utf-8 -*- import numpy as npfrom pandas import Series, DataFrame, MultiIndexprint 'Series的层次索引'data = Series([1,3,56,2,88, 32,43,12,65,90], index = [['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'], [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])print data# a 1 1# 2 3# 3 56# b 1 2# 2 88# 3 32# c 1 43# 2 12# d 2 65# 3 90print data.index# [a 1, 2, 3, b 1, 2, 3, c 1, 2, d 2, 3]print data[:2]# a 1 1# 2 3print data.unstack()# 1 2 3# a 1 3 56# b 2 88 32# c 43 12 NaN# d NaN 65 90print data.unstack().stack()# a 1 1# 2 3# 3 56# b 1 2# 2 88# 3 32# c 1 43# 2 12# d 2 65# 3 90print 'DataFrame的层次索引'frame = DataFrame(np.arange(12).reshape((4, 3)), index = [['a', 'a', 'b', 'b'], [1, 2, 1, 2]], columns = [['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']])print frame# Ohio Colorado# Green Red Green# a 1 0 1 2# 2 3 4 5# b 1 6 7 8# 2 9 10 11frame.index.names = ['key1', 'key2']frame.columns.names = ['state', 'color']print frame# key1 key2 # a 1 0 1 2# 2 3 4 5# b 1 6 7 8# 2 9 10 11print frame.ix['a', 1]# state color# Ohio Green 0# Red 1# Colorado Green 2print frame.ix['a', 2]['Colorado']# color# Green 5print frame.ix['a', 2]['Ohio']['Red']# 4print '直接用MultiIndex创建层次索引结构'print MultiIndex.from_arrays([['Ohio', 'Ohio', 'Colorado'], ['Gree', 'Red', 'Green']], names = ['state', 'color'])print '索引层级交换'frame_swapped = frame.swaplevel('key1', 'key2')print frame_swapped# state Ohio Colorado# color Green Red Green# key2 key1 # 1 a 0 1 2# 2 a 3 4 5# 1 b 6 7 8# 2 b 9 10 11print frame_swapped.swaplevel(0, 1) #也可以这样写# state Ohio Colorado# color Green Red Green# key1 key2 # a 1 0 1 2# 2 3 4 5# b 1 6 7 8# 2 9 10 11print '根据索引排序'print frame.sortlevel('key2')# state Ohio Colorado# color Green Red Green# key1 key2 # a 1 0 1 2# b 1 6 7 8# a 2 3 4 5# b 2 9 10 11print frame.swaplevel(0, 1).sortlevel(0)# state Ohio Colorado# color Green Red Green# key2 key1 # 1 a 0 1 2# b 6 7 8# 2 a 3 4 5# b 9 10 11print '根据指定的key计算统计信息'print frame.sum(level = 'key2')# state Ohio Colorado# color Green Red Green# key2 # 1 6 8 10# 2 12 14 16print '使用列生成层次索引'frame = DataFrame({'a':range(7), 'b':range(7, 0, -1), 'c':['one', 'one', 'one', 'two', 'two', 'two', 'two'], 'd':[0, 1, 2, 0, 1, 2, 3]})print frame# a b c d# 0 0 7 one 0# 1 1 6 one 1# 2 2 5 one 2# 3 3 4 two 0# 4 4 3 two 1# 5 5 2 two 2# 6 6 1 two 3print frame.set_index(['c', 'd']) # 把c/d列变成索引# c d # one 0 0 7# 1 1 6# 2 2 5# two 0 3 4# 1 4 3# 2 5 2# 3 6 1cdprint frame.set_index(['c', 'd'], drop = False) # cd列依然保留# c d # one 0 0 7 one 0# 1 1 6 one 1# 2 2 5 one 2# two 0 3 4 two 0# 1 4 3 two 1# 2 5 2 two 2# 3 6 1 two 3frame2 = frame.set_index(['c', 'd'])print frame2.reset_index() #还原# c d a b# 0 one 0 0 7# 1 one 1 1 6# 2 one 2 2 5# 3 two 0 3 4# 4 two 1 4 3# 5 two 2 5 2# 6 two 3 6 1
pandas的基本用法12-整数索引
# -*- coding: utf-8 -*- import numpy as npimport sysfrom pandas import Series, DataFrameprint '整数索引'ser = Series(np.arange(3.))print sertry: print ser[-1] # 这里会有歧义except: print sys.exc_info()[0]ser2 = Series(np.arange(3.), index = ['a', 'b', 'c'])print ser2[-1]ser3 = Series(range(3), index = [-5, 1, 3])print ser3.iloc[2] # 避免直接用[2]产生的歧义printprint '对DataFrame使用整数索引'frame = DataFrame(np.arange(6).reshape((3, 2)), index = [2, 0, 1])print frameprint frame.iloc[0]print frame.iloc[:, 1]
阅读全文
1 0
- pandas 基础
- pandas基础
- pandas 基础
- Pandas基础
- Pandas基础
- pandas基础
- pandas基础
- pandas 基础
- pandas基础
- pandas 基础
- pandas基础
- Pandas学习笔记:pandas基础
- Python Pandas基础1
- Python pandas基础2
- python pandas基础3
- pandas 基础1
- python pandas库基础
- Pandas数据分析基础
- spring配置redis的单元测试(spring-test,junit)
- 关于SpringMVC中org.springframework.beans.factory.BeanCreationException: Could not autowire field:的问题的解决
- 从数据库中提取数据并注入到对象中
- highchars demo
- JavaScript中的逻辑运算符(笔记整理)
- pandas基础
- 钱币兑换问题
- HTTP长连接和短链接
- C#和NewSQL更配 —— CockroachDB入门(可能是C#下的全网首发)
- Immutable 详解及 React 中实践
- 求二叉树的深度
- 李文星事件谁该反思?
- HDU1213-How Many Tables (并查集)
- SELinux状态