python-pandas的基本用法09

来源:互联网 发布:ubuntu 安装微信 编辑:程序博客网 时间:2024/06/05 15:46

pandas的基本用法09-统计函数

这里写图片描述

这里写图片描述

这里写图片描述

这里写图片描述

# -*- coding: utf-8 -*- import numpy as npimport pandas as pdfrom pandas import Series, DataFrameprint '求和'df = DataFrame([[1.4, np.nan], [7.1, -4.5], [np.nan, np.nan], [0.75, -1.3]],              index = ['a', 'b', 'c', 'd'],              columns = ['one', 'two'])print df#     one  two# a  1.40  NaN# b  7.10 -4.5# c   NaN  NaN# d  0.75 -1.3print df.sum() #默认按列求和# one    9.25# two   -5.80print df.sum(axis=1) #按行求和# a    1.40# b    2.60# c     NaN# d   -0.55print df.idxmax()# one    b# two    dprint df.cumsum()# one  two# a  1.40  NaN# b  8.50 -4.5# c   NaN  NaN# d  9.25 -5.8print df.describe()#  one       two# count  3.000000  2.000000# mean   3.083333 -2.900000# std    3.493685  2.262742# min    0.750000 -4.500000# 25%    1.075000 -3.700000# 50%    1.400000 -2.900000# 75%    4.250000 -2.100000# max    7.100000 -1.300000# print '相关性与协方差' # all_data = {}# for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']:#     all_data[ticker] = web.get_data_yahoo(ticker, '4/1/2016', '7/15/2015')#     price = DataFrame({tic: data['Adj Close'] for tic, data in all_data.iteritems()})#     volume = DataFrame({tic: data['Volume'] for tic, data in all_data.iteritems()})# returns = price.pct_change()# # print returns.tail()# print returns.MSFT.corr(returns.IBM)# print returns.corr()  # 相关性,自己和自己的相关性总是1# print returns.cov() # 协方差# print returns.corrwith(returns.IBM)# print returns.corrwith(returns.volume)print '去重's = Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c'])print s.unique()# ['c' 'a' 'd' 'b']print pd.value_counts(s)# c    3# a    3# b    2# d    1print '判断元素存在'mask = s.isin(['a', 'b', 'c'])print mask# 0     True# 1     True# 2    False# 3     True# 4     True# 5     True# 6     True# 7     True# 8     Trueprint s[mask]# 0    c# 1    a# 3    a# 4    a# 5    b# 6    b# 7    c# 8    cdata = DataFrame({'c1':[1, 3, 4, 3, 4],                  'c2':[2, 3, 1, 2, 3],                  'c3':[1, 5, 2, 4, 4]})print data#    c1  c2  c3# 0   1   2   1# 1   3   3   5# 2   4   1   2# 3   3   2   4# 4   4   3   4print data.apply(pd.value_counts).fillna(0)#    c1  c2  c3# 1   1   1   1# 2   0   2   1# 3   2   2   0# 4   2   0   2# 5   0   0   1print data.apply(pd.value_counts, axis = 1).fillna(0)#    1  2  3  4  5# 0  2  1  0  0  0# 1  0  0  2  0  1# 2  1  1  0  1  0# 3  0  1  1  1  0# 4  0  0  1  2  0
原创粉丝点击