第十章 时间序列

来源:互联网 发布:sql server win7 编辑:程序博客网 时间:2024/05/22 00:48


from datetime import datetimenow=datetime.now()nowOut[3]: datetime.datetime(2017, 6, 28, 9, 15, 30, 916000)now.year,now.month,now.dayOut[4]: (2017, 6, 28)delta=datetime(2017,6,28)-datetime(2008,6,24,8,15)deltaOut[6]: datetime.timedelta(3290, 56700)delta.daysOut[8]: 3290delta.secondsOut[9]: 56700from datetime import timedeltadelta=datetime(2017,9,2)-datetime(2017,6,28,9,26)deltaOut[12]: datetime.timedelta(65, 52440)delta=datetime(2017,9,2)-datetime(2017,6,28,19,26)deltaOut[14]: datetime.timedelta(65, 16440)#给datetime对象加上(或减去)一个或多个timedeltafrom datetime import timedeltastart=datetime(2017,6,28)start+timedelta(65)Out[18]: datetime.datetime(2017, 9, 1, 0, 0)#字符串与datetime的相互转换stamp=datetime(2017,6,28)str(stamp)Out[20]: '2017-06-28 00:00:00'stamp.strftime('%Y-%m-%d')Out[21]: '2017-06-28'value='2017-06-28'datetime.strptime(value,'%Y-%m-%d')Out[24]: datetime.datetime(2017, 6, 28, 0, 0)datestrs=['7/6/2011','8/6/2011'][datetime.strptime(x,'%m/%d/%Y') for x in datestrs]Out[29]: [datetime.datetime(2011, 7, 6, 0, 0), datetime.datetime(2011, 8, 6, 0, 0)]from dateutil.parser import parseparse('2017-06-28')Out[31]: datetime.datetime(2017, 6, 28, 0, 0)parse('Jun 28,2017 9:45 PM')Out[32]: datetime.datetime(2017, 6, 28, 21, 45)#国际通用格式:日在月前面。传入dayfirst=True即可parse('6/12/2011',dayfirst=True)Out[33]: datetime.datetime(2011, 12, 6, 0, 0)datestrsOut[34]: ['7/6/2011', '8/6/2011']import pandas as pdpd.to_datetime(datestrs)Out[36]: DatetimeIndex(['2011-07-06', '2011-08-06'], dtype='datetime64[ns]', freq=None)idx=pd.to_datetime(datestrs+[None])idxOut[38]: DatetimeIndex(['2011-07-06', '2011-08-06', 'NaT'], dtype='datetime64[ns]', freq=None)idx[2]Out[39]: NaT#NaT是pandas中时间戳的缺失值pd.isnull(idx)Out[40]: array([False, False,  True], dtype=bool)#时间序列基础from datetime import datetimeimport numpy as npfrom pandas import Seriesdates=[datetime(2011,1,2),datetime(2011,1,5),datetime(2011,1,7),       datetime(2011,1,8),datetime(2011,1,10),datetime(2011,1,12)]ts=Series(np.random.randn(6),index=dates)tsOut[47]: 2011-01-02    1.5829042011-01-05   -1.4394382011-01-07   -0.4118492011-01-08    0.7472152011-01-10    0.3843362011-01-12    0.432144dtype: float64type(ts)Out[48]: pandas.core.series.Seriests.indexOut[49]: DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',               '2011-01-10', '2011-01-12'],              dtype='datetime64[ns]', freq=None)ts+ts[::2]Out[50]: 2011-01-02    3.1658072011-01-05         NaN2011-01-07   -0.8236972011-01-08         NaN2011-01-10    0.7686722011-01-12         NaNdtype: float64#DatetimeIndex的各个标量值是timastamp的对象stamp=ts.index[0]stampOut[53]: Timestamp('2011-01-02 00:00:00')#索引、选取、子集的构造#TimeSeries是Series的一个子类,在索引以及数据选取方面用法相同stamp=ts.index[2]ts[stamp]Out[60]: -0.41184861375211623#传入一个可被解释为日期的字符串ts['1/10/2011']Out[62]: 0.38433616480100646#对于较长的时间序列,只需传入“年”或“年月”即可轻松选取数据的切片longer_ts=Series(np.random.randn(1000),index=pd.date_range('1/1/2000',periods=1000))longer_tsOut[65]: 2000-01-01   -0.3584782000-01-02   -0.0042552000-01-03   -0.0516922000-01-04    0.7042172000-01-05    0.4890142000-01-06   -0.9058602000-01-07    0.5236102000-01-08    0.0757872000-01-09    0.5308212000-01-10    0.9093072000-01-11   -0.1738902000-01-12    0.1595202000-01-13   -1.1448132000-01-14    0.5490582000-01-15   -0.4740992000-01-16   -1.2721992000-01-17    0.8185962000-01-18   -1.2095472000-01-19    1.4030112000-01-20    1.4304302000-01-21    1.2031092000-01-22    0.8309082000-01-23    0.1002472000-01-24    0.4661662000-01-25    0.5488372000-01-26    0.3286412000-01-27    0.4853742000-01-28   -0.8293452000-01-29   -0.7308732000-01-30   -0.196522  2002-08-28    1.2243192002-08-29   -0.2509302002-08-30   -0.5437012002-08-31   -0.0722692002-09-01   -0.1227382002-09-02    0.9176222002-09-03    0.8141922002-09-04   -0.1304142002-09-05   -0.3680462002-09-06    0.5526892002-09-07    2.2614302002-09-08   -1.0508852002-09-09   -0.0155222002-09-10    0.5608972002-09-11   -0.0691632002-09-12   -0.4321032002-09-13   -0.8475682002-09-14    1.1278292002-09-15    0.7951242002-09-16   -1.6202122002-09-17    0.3075662002-09-18    0.1596862002-09-19    0.4918702002-09-20   -0.7601132002-09-21   -1.1080782002-09-22   -1.0781662002-09-23    1.0540802002-09-24    1.7582952002-09-25    0.2833222002-09-26    0.416153Freq: D, dtype: float64#通过日期进行切片的方式只对规则Series有效ts[datetime(2011,1,7):]2011-01-07   -0.4118492011-01-08    0.7472152011-01-10    0.3843362011-01-12    0.432144dtype: float64#也可用不存在该时间序列中的时间戳对其进行切片ts['1/6/2011':'1/11/2011']Out[69]: 2011-01-07   -0.4118492011-01-08    0.7472152011-01-10    0.384336dtype: float64ts.truncate(after='1/9/2011')Out[70]: 2011-01-02    1.5829042011-01-05   -1.4394382011-01-07   -0.4118492011-01-08    0.747215dtype: float64from pandas import DataFramedates=pd.date_range('1/1/2000',periods=100,freq='W-WED')long_df=DataFrame(np.random.randn(100,4),index=dates,columns=['Colorado','Texas','New York','Ohio'])long_df.ix['6-2001']            Colorado     Texas  New York      Ohio2001-06-06  0.663364 -0.225784 -0.397083  0.2702972001-06-13  0.978811  1.239237  1.860109  0.3229082001-06-20 -1.187403  1.011756  0.817634  0.5031172001-06-27 -0.110130  0.367503  1.627412  0.758577








原创粉丝点击