《利用Python进行数据分析》笔记---第10章时间序列

来源:互联网 发布:周芷若结局 知乎 编辑:程序博客网 时间:2024/06/05 17:46

写在前面的话:

实例中的所有数据都是在GitHub上下载的,打包下载即可。
地址是:http://github.com/pydata/pydata-book

还有一定要说明的:

我使用的是Python2.7,书中的代码有一些有错误,我使用自己的2.7版本调通。

# coding: utf-8from __future__ import divisionfrom pandas import Series, DataFrameimport pandas as pdfrom numpy.random import randnimport numpy as npimport matplotlib.pyplot as pltpd.options.display.max_rows = 12np.set_printoptions(precision=4, suppress=True)plt.rc('figure', figsize=(12, 4))from datetime import datetimenow = datetime.now()nownow.year, now.month, now.daydelta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)deltadelta.daysdelta.secondsfrom datetime import timedeltastart = datetime(2011, 1, 7)start + timedelta(12)start - 2 * timedelta(12)stamp = datetime(2011, 1, 3)str(stamp)stamp.strftime('%Y-%m-%d')value = '2011-01-03'datetime.strptime(value, '%Y-%m-%d')datestrs = ['7/6/2011', '8/6/2011'][datetime.strptime(x, '%m/%d/%Y') for x in datestrs]from dateutil.parser import parseparse('2011-01-03')parse('Jan 31, 1997 10:45 PM')parse('6/12/2011', dayfirst=True)datestrspd.to_datetime(datestrs)idx = pd.to_datetime(datestrs + [None])idxidx[2]pd.isnull(idx)from datetime import datetimedates = [datetime(2011, 1, 2), datetime(2011, 1, 5), datetime(2011, 1, 7),         datetime(2011, 1, 8), datetime(2011, 1, 10), datetime(2011, 1, 12)]ts = Series(np.random.randn(6), index=dates)tstype(ts)ts.indexts + ts[::2]ts.index.dtypestamp = ts.index[0]stampstamp = ts.index[2]ts[stamp]ts['1/10/2011']ts['20110110']longer_ts = Series(np.random.randn(1000),index=pd.date_range('1/1/2000', periods=1000))longer_tslonger_ts['2001']longer_ts['2001-05']ts[datetime(2011, 1, 7):]tsts['1/6/2011':'1/11/2011']ts.truncate(after='1/9/2011')dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')long_df = DataFrame(np.random.randn(100, 4),index=dates,columns=['Colorado', 'Texas', 'New York', 'Ohio'])long_df.ix['5-2001']dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/2/2000', '1/3/2000'])dup_ts = Series(np.arange(5), index=dates)dup_tsdup_ts.index.is_uniquedup_ts['1/3/2000']dup_ts['1/2/2000']grouped = dup_ts.groupby(level=0)grouped.mean()grouped.count()tsts.resample('D')index = pd.date_range('4/1/2012', '6/1/2012')indexpd.date_range(start='4/1/2012', periods=20)pd.date_range(end='6/1/2012', periods=20)pd.date_range('1/1/2000', '12/1/2000', freq='BM')pd.date_range('5/2/2012 12:56:31', periods=5)pd.date_range('5/2/2012 12:56:31', periods=5, normalize=True)from pandas.tseries.offsets import Hour, Minutehour = Hour()hourfour_hours = Hour(4)four_hourspd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h')Hour(2) + Minute(30)pd.date_range('1/1/2000', periods=10, freq='1h30min')rng = pd.date_range('1/1/2012', '9/1/2012', freq='WOM-3FRI')list(rng)ts = Series(np.random.randn(4),index=pd.date_range('1/1/2000', periods=4, freq='M'))tsts.shift(2)ts.shift(-2)ts / ts.shift(1) - 1ts.shift(2, freq='M')ts.shift(3, freq='D')ts.shift(1, freq='3D')ts.shift(1, freq='90T')from pandas.tseries.offsets import Day, MonthEndnow = datetime(2011, 11, 17)now + 3 * Day()now + MonthEnd()now + MonthEnd(2)offset = MonthEnd()offset.rollforward(now)offset.rollback(now)ts = Series(np.random.randn(20),index=pd.date_range('1/15/2000', periods=20, freq='4d'))ts.groupby(offset.rollforward).mean()ts.resample('M', how='mean')import pytzpytz.common_timezones[-5:]tz = pytz.timezone('US/Eastern')tzrng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')ts = Series(np.random.randn(len(rng)), index=rng)print(ts.index.tz)pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')ts_utc = ts.tz_localize('UTC')ts_utcts_utc.indexts_utc.tz_convert('US/Eastern')ts_eastern = ts.tz_localize('US/Eastern')ts_eastern.tz_convert('UTC')ts_eastern.tz_convert('Europe/Berlin')ts.index.tz_localize('Asia/Shanghai')stamp = pd.Timestamp('2011-03-12 04:00')stamp_utc = stamp.tz_localize('utc')stamp_utc.tz_convert('US/Eastern')stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')stamp_moscowstamp_utc.valuestamp_utc.tz_convert('US/Eastern').valuefrom pandas.tseries.offsets import Hourstamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')stampstamp + Hour()stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')stampstamp + 2 * Hour()rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')ts = Series(np.random.randn(len(rng)), index=rng)tsts1 = ts[:7].tz_localize('Europe/London')ts2 = ts1[2:].tz_convert('Europe/Moscow')result = ts1 + ts2result.indexp = pd.Period(2007, freq='A-DEC')pp + 5p - 2pd.Period('2014', freq='A-DEC') - prng = pd.period_range('1/1/2000', '6/30/2000', freq='M')rngSeries(np.random.randn(6), index=rng)values = ['2001Q3', '2002Q2', '2003Q1']index = pd.PeriodIndex(values, freq='Q-DEC')indexp = pd.Period('2007', freq='A-DEC')p.asfreq('M', how='start')p.asfreq('M', how='end')p = pd.Period('2007', freq='A-JUN')p.asfreq('M', 'start')p.asfreq('M', 'end')p = pd.Period('Aug-2007', 'M')p.asfreq('A-JUN')rng = pd.period_range('2006', '2009', freq='A-DEC')ts = Series(np.random.randn(len(rng)), index=rng)tsts.asfreq('M', how='start')ts.asfreq('B', how='end')p = pd.Period('2012Q4', freq='Q-JAN')pp.asfreq('D', 'start')p.asfreq('D', 'end')p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60p4pmp4pm.to_timestamp()rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')ts = Series(np.arange(len(rng)), index=rng)tsnew_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60ts.index = new_rng.to_timestamp()tsrng = pd.date_range('1/1/2000', periods=3, freq='M')ts = Series(randn(3), index=rng)pts = ts.to_period()tsptsrng = pd.date_range('1/29/2000', periods=6, freq='D')ts2 = Series(randn(6), index=rng)ts2.to_period('M')pts = ts.to_period()ptspts.to_timestamp(how='end')data = pd.read_csv('D:\Source Code\pydata-book-master\ch08\macrodata.csv')data.yeardata.quarterindex = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC')indexdata.index = indexdata.inflrng = pd.date_range('1/1/2000', periods=100, freq='D')ts = Series(randn(len(rng)), index=rng)ts.resample('M', how='mean')ts.resample('M', how='mean', kind='period')rng = pd.date_range('1/1/2000', periods=12, freq='T')ts = Series(np.arange(12), index=rng)tsts.resample('5min', how='sum')ts.resample('5min', how='sum', closed='left')ts.resample('5min', how='sum', closed='left', label='left')ts.resample('5min', how='sum', loffset='-1s')ts.resample('5min', how='ohlc')rng = pd.date_range('1/1/2000', periods=100, freq='D')ts = Series(np.arange(100), index=rng)ts.groupby(lambda x: x.month).mean()ts.groupby(lambda x: x.weekday).mean()frame = DataFrame(np.random.randn(2, 4), index=pd.date_range('1/1/2000', periods=2, freq='W-WED'), columns=['Colorado', 'Texas', 'New York', 'Ohio'])framedf_daily = frame.resample('D')df_dailyframe.resample('D', fill_method='ffill')frame.resample('D', fill_method='ffill', limit=2)frame.resample('W-THU', fill_method='ffill')frame = DataFrame(np.random.randn(24, 4),index=pd.period_range('1-2000', '12-2001', freq='M'),columns=['Colorado', 'Texas', 'New York', 'Ohio'])frame[:5]annual_frame = frame.resample('A-DEC', how='mean')annual_frameannual_frame.resample('Q-DEC', fill_method='ffill')annual_frame.resample('Q-DEC', fill_method='ffill', convention='start')annual_frame.resample('Q-MAR', fill_method='ffill')close_px_all = pd.read_csv('D:\Source Code\pydata-book-master\ch09\stock_px.csv', parse_dates=True, index_col=0)close_px = close_px_all[['AAPL', 'MSFT', 'XOM']]close_px = close_px.resample('B', fill_method='ffill')close_px.info()close_px['AAPL'].plot()close_px.ix['2009'].plot()close_px['AAPL'].ix['01-2011':'03-2011'].plot()appl_q = close_px['AAPL'].resample('Q-DEC', fill_method='ffill')appl_q.ix['2009':].plot()close_px = close_px.asfreq('B').fillna(method='ffill')close_px.AAPL.plot()pd.rolling_mean(close_px.AAPL, 250).plot()plt.figure()appl_std250 = pd.rolling_std(close_px.AAPL, 250, min_periods=10)appl_std250[5:12]appl_std250.plot()
阅读全文
2 0