python---实战3统计特定时间次数(文件操作、替换、列表、排序、去重、元组、集合)

来源:互联网 发布:下载微博软件 编辑:程序博客网 时间:2024/06/05 16:03

1、该文本里,有多少个2012年11月发布的tweets。
目标文件:百度网盘
http://pan.baidu.com/s/1kU6X2GB

所求问题为:该文本里,有多少个2012年11月发布的tweets。 (要求:输出为一个整数。提示:请阅读python的time模块)

python源码:

root@kali:~/python/laowangpy/datadig# cat 3numberof201211.py #!/usr/bin/python# --*-- coding:utf-8 --*--import stringimport timetext_userdata = open('twitterdata.txt').read()#对目标文本进行全部读取,并赋值#print text_userdatal = [n for n in text_userdata.split('\r\n')]#对目标文件使用\r\n进行分割,把分割好的每个元素加入列表n中print l[0]#打印第一个元素信息i = 0n = []m = []while l[i] != "":#当列表l中的元素不等于空白,则进入    s = l[i].replace('"','')#对列表l中每个元素中的"符号使用空白替换    n.append(s)#加入列表n中    t = s.split(",")#对列表s使用,分割    m.append(t[6])#对列表t中第6个元素加入列表m中    f = open("test.txt","a")#打开文件test.txt    f.write(m[i])#对列表m每个元素写入文件test.txt中    f.close()    i = i + 1#循环加1print m[0]j = 0yeararry = []timestamparry = []while m[j] != m[-1]:#对列表m循环元素时不等于最后一个呀元素是,则进入    timearry = time.strptime(m[j], "%Y-%m-%d %H:%M:%S")#对列表m每个元素转换成时间格式    timestamp = int(time.mktime(timearry))#对时间格式元素转成时间戳    timestamparry.append(timestamp)#对没有转成时间戳元素加入列表timestamparry    year = time.localtime(timestamparry[j])[0]#对每个时间戳的元素取出第一个年份的值    month = time.localtime(timestamparry[j])[1]#对每个时间戳的元素取出第二个月份的值    yeararry.append(str(year)+str(month))#对取出的整型年份值与整型月份值,强制转成字符型,再组合,再加入列表yeararry    j = j + 1print [(x,yeararry.count(x)) for x in set(yeararry)]#对列表yeararry中每个元素去重统计出现频次统计root@kali:~/python/laowangpy/datadig# 

脚本运行结果:

root@kali:~/python/laowangpy/datadig# python 3numberof201211.py "264345016313466880","28803555","てんじょう","","RT@h_ototake:はるかぜちゃんが殺人予告とも取れるツイートを受け、親御さんが警察に通報した。それを受けて、「だから小学生にネットなんかやらせるから…」という感想を漏らしている人が多いことに驚く。「退場すべきは、いじめられた側だ」というわけか。これでは、いじめ...","","2012-11-02 12:35:37","web","26597","","152963467","乙武 洋匡","","はるかぜちゃんが殺人予告とも取れるツイートを受け、親御さんが警察に通報した。それを受けて、「だから小学生にネットなんかやらせるから…」という感想を漏らしている人が多いことに驚く。「退場すべきは、いじめられた側だ」というわけか。これでは、いじめがはびこるわけだ。","","26597","","","264341471132528640","","264341471132528640","264345016313466880","","","","","","","","","","","","https://twitter.com/h_ototake/status/264341471132528640","https://twitter.com/mtenjo/status/264345016313466880"2012-11-02 12:35:37[('201411', 4), ('201412', 2), ('201312', 1), ('201311', 19), ('201310', 2), ('201210', 4), ('201211', 124)]root@kali:~/python/laowangpy/datadig# 

test.txt文件保存结果:

root@kali:~/python/laowangpy/datadig# cat test.txt 2012-11-02 12:35:372012-11-02 12:35:552012-11-02 13:02:582012-11-02 13:05:382012-11-02 13:07:302012-11-03 00:30:132012-11-03 01:34:142012-11-03 03:02:062012-11-04 04:17:552012-11-04 05:19:132012-11-04 06:18:202012-11-04 06:19:052012-11-04 06:22:332012-11-04 06:23:212012-11-02 00:26:102012-11-02 00:26:592012-11-02 00:30:212012-11-02 11:44:442012-11-02 13:59:462012-11-03 14:41:052012-11-03 21:43:112012-11-04 10:43:142012-11-03 22:00:092012-11-03 22:00:562012-11-03 22:02:252012-11-03 22:03:182012-11-03 22:05:572012-11-03 22:06:442012-11-03 22:07:252012-11-03 22:08:422012-11-03 22:09:282012-11-03 22:11:382012-11-04 05:07:292012-11-04 05:07:592012-11-03 03:11:192012-11-03 05:03:212012-11-03 12:57:092012-11-03 13:01:272012-11-03 13:03:162012-11-04 10:43:382012-11-04 10:45:352012-11-04 10:48:452012-11-04 06:25:562012-11-04 06:26:452012-11-04 06:27:042012-11-04 07:13:432012-11-04 07:19:532012-11-04 07:32:502012-11-01 17:36:522012-11-02 16:03:472012-11-02 16:04:392012-11-02 16:10:012012-11-02 16:38:572012-11-02 16:40:592012-11-02 16:44:172012-11-03 19:10:112012-11-03 19:11:042012-11-03 19:11:292012-11-03 19:27:312012-11-03 20:39:012012-11-04 00:39:292012-11-04 02:08:092012-11-04 02:08:492012-11-04 02:09:212012-11-04 02:10:002012-11-04 04:39:262012-11-04 08:39:142012-11-04 09:52:282012-11-04 09:53:092012-11-04 10:01:082012-11-04 05:13:072012-11-04 05:15:492012-11-03 11:21:232012-11-03 11:24:152012-11-03 11:31:142012-11-03 12:22:162012-11-03 13:39:292012-11-03 13:40:042012-11-03 13:57:542012-11-03 14:24:462012-11-03 15:03:112012-11-03 19:09:132012-11-03 20:23:572012-10-29 10:10:062012-10-30 06:41:592012-10-30 06:43:302012-10-30 06:45:332012-10-30 09:19:522012-11-01 02:26:282012-11-01 05:47:412012-11-02 07:25:332012-11-02 07:28:152012-11-02 07:48:132012-11-02 07:49:232012-11-03 02:54:542012-11-03 02:59:332012-11-03 23:54:512012-11-03 23:55:092012-11-04 00:25:462012-11-04 00:26:512012-11-04 00:37:452012-11-04 00:44:172012-11-04 01:00:042012-11-04 02:17:512012-11-04 03:33:482012-11-04 04:06:392012-11-04 05:21:152012-11-04 05:23:192012-11-04 05:25:582012-11-04 05:27:052012-11-04 05:28:392012-11-04 02:02:562012-11-04 02:39:212012-11-04 03:02:572012-11-04 03:39:402012-11-04 04:02:482012-11-04 04:39:032012-11-04 04:39:382012-11-04 05:03:002012-11-03 11:29:132012-11-03 12:15:042012-11-03 13:11:162012-11-03 13:15:272012-11-03 15:08:462012-11-03 15:17:552012-11-03 15:19:222012-11-03 15:34:222012-11-04 00:49:032012-11-04 00:52:562012-11-04 04:58:452012-11-03 18:53:522012-11-03 19:25:232012-11-03 20:25:202012-11-03 20:54:012012-11-03 21:22:102012-11-03 21:50:232012-11-03 23:23:072012-11-04 00:09:352012-11-04 00:56:422012-11-04 01:43:112012-11-03 10:38:432012-11-03 11:01:342012-11-03 12:17:262012-11-03 12:54:052012-11-03 16:46:282012-11-03 16:47:412012-11-03 16:48:252012-11-03 22:27:122012-11-03 22:28:142012-11-03 22:30:352012-11-03 22:32:012012-11-03 22:41:062012-11-03 22:42:182012-11-03 22:51:322012-11-04 08:36:512012-11-04 08:45:282012-11-04 09:00:29root@kali:~/python/laowangpy/datadig# 

python时间模块操作

时间戳生成与取特定的年份与月份值

将字符串的时间转换为时间戳    方法:        a = "2013-10-10 23:40:00"        将其转换为时间数组        import time        timeArray = time.strptime(a, "%Y-%m-%d %H:%M:%S")    转换为时间戳:    timeStamp = int(time.mktime(timeArray))    timeStamp == 1381419600
root@kali:~/python/laowangpy/datadig# pythonPython 2.7.3 (default, Mar 14 2014, 11:57:14) [GCC 4.7.2] on linux2Type "help", "copyright", "credits" or "license" for more information.>>> import time >>> print time.time()1508336495.91>>> a = '2011-11-12 23:40:45'>>> timearry =time.strptime(a,"%Y-%m-%d %H:%M:%S")>>> print timearrytime.struct_time(tm_year=2011, tm_mon=11, tm_mday=12, tm_hour=23, tm_min=40, tm_sec=45, tm_wday=5, tm_yday=316, tm_isdst=-1)>>> print timearrytime.struct_time(tm_year=2011, tm_mon=11, tm_mday=12, tm_hour=23, tm_min=40, tm_sec=45, tm_wday=5, tm_yday=316, tm_isdst=-1)>>> timestamp = int(time.mktime(timearry))>>> print timestamp1321112445>>> time.ctime()'Wed Oct 18 22:41:39 2017'>>> time.ctime(1321112445)'Sat Nov 12 23:40:45 2011'>>> time.gmtime()time.struct_time(tm_year=2017, tm_mon=10, tm_mday=18, tm_hour=14, tm_min=42, tm_sec=26, tm_wday=2, tm_yday=291, tm_isdst=0)>>> time.gmtime(1321112445)time.struct_time(tm_year=2011, tm_mon=11, tm_mday=12, tm_hour=15, tm_min=40, tm_sec=45, tm_wday=5, tm_yday=316, tm_isdst=0)>>> time.localtime()time.struct_time(tm_year=2017, tm_mon=10, tm_mday=18, tm_hour=22, tm_min=43, tm_sec=0, tm_wday=2, tm_yday=291, tm_isdst=0)>>> time.localtime(1321112445)time.struct_time(tm_year=2011, tm_mon=11, tm_mday=12, tm_hour=23, tm_min=40, tm_sec=45, tm_wday=5, tm_yday=316, tm_isdst=0)>>> >>> print time.localtime(1321112445)[0]2011>>> print time.localtime(1321112445)[1]11>>> print time.localtime(1321112445)[0]2011>>>

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2、有哪几天的数据

目标文件:百度网盘
http://pan.baidu.com/s/1kU6X2GB

所求问题为:该文本里,有哪几天的数据? (要求:输出为一个list,例:[‘2012-03-04’,’2012-03-05’])
python源码:

root@kali:~/python/laowangpy/datadig# cat 4adddaysdata.py #!/usr/bin/python# --*-- coding:utf-8 --*--import stringimport timetext_userdata = open('twitterdata.txt').read()#对目标文本进行全部读取,并赋值#print text_userdatal = [n for n in text_userdata.split('\r\n')]#对目标文件使用\r\n进行分割,把分割好的每个元素加入列表n中#print l[0]#打印第一个元素信息i = 0n = []m = []while l[i] != "":#当列表l中的元素不等于空白,则进入    s = l[i].replace('"','')#对列表l中每个元素中的"符号使用空白替换    n.append(s)#加入列表n中    t = s.split(",")#对列表s使用,分割    m.append(t[6])#对列表t中第6个元素加入列表m中    f = open("test.txt","a")#打开文件test.txt    f.write(m[i])#对列表m每个元素写入文件test.txt中    f.close()    i = i + 1#循环加1#print m[0]#打印列表m的第一个元素j = 0yeararry = []timestamparry = []while m[j] != m[-1]:#对列表m循环元素时不等于最后一个呀元素是,则进入    timearry = time.strptime(m[j], "%Y-%m-%d %H:%M:%S")#对列表m每个元素转换成时间格式    timestamp = int(time.mktime(timearry))#对时间格式元素转成时间戳    timestamparry.append(timestamp)#对没有转成时间戳元素加入列表timestamparry    year = time.localtime(timestamparry[j])[0]#对每个时间戳的元素取出第一个年份的值    month = time.localtime(timestamparry[j])[1]#对每个时间戳的元素取出第二个月份的值    day = time.localtime(timestamparry[j])[2]#对每个时间戳的元素取出第三个天数的值    yeararry.append(str(year)+"-"+str(month)+"-"+str(day))#对取出的整型年份值、整型月份值和整型天数值,强制转成字符型,再组合,再加入列表yeararry    j = j + 1fredata = [(x,yeararry.count(x)) for x in set(yeararry)]#对列表yeararry中每个元素出现频次统计print fredataprint "--------------------------按出现的频率统计如下:--------------------------------"sortfredata = sorted(fredata)#对列表fredata转换成集合sortfredataprint sorted(sortfredata,key=lambda x:x[1],reverse=True)#对集合sortfredata中使用第二个key值大小,并从大到小排序root@kali:~/python/laowangpy/datadig# 

Python运行情况:((‘2013-10-29’, 1)中的数字1是在该文本日志中出现的频次)

root@kali:~/python/laowangpy/datadig# python 4adddaysdata.py [('2014-11-3', 3), ('2013-10-29', 1), ('2014-11-4', 1), ('2013-11-3', 10), ('2013-11-2', 1), ('2013-11-4', 8), ('2014-12-1', 1), ('2013-10-4', 1), ('2012-10-30', 4), ('2013-12-4', 1), ('2012-11-4', 48), ('2014-12-3', 1), ('2012-11-1', 2), ('2012-11-2', 19), ('2012-11-3', 55)]--------------------------按出现的频次的高低排序统计如下:--------------------------------[('2012-11-3', 55), ('2012-11-4', 48), ('2012-11-2', 19), ('2013-11-3', 10), ('2013-11-4', 8), ('2012-10-30', 4), ('2014-11-3', 3), ('2012-11-1', 2), ('2013-10-29', 1), ('2013-10-4', 1), ('2013-11-2', 1), ('2013-12-4', 1), ('2014-11-4', 1), ('2014-12-1', 1), ('2014-12-3', 1)]root@kali:~/python/laowangpy/datadig# 

——————————————————————————————————————————————
3、哪个小时发布的数据最多

目标文件:百度网盘
http://pan.baidu.com/s/1kU6X2GB

所求问题为:该文本里,在哪个小时发布的数据最多? (要求:输出一个整数。)
python源码:

root@kali:~/python/laowangpy/datadig# cat 5whichhours.py #!/usr/bin/python# --*-- coding:utf-8 --*--import stringimport timetext_userdata = open('twitterdata.txt').read()#对目标文本进行全部读取,并赋值#print text_userdatal = [n for n in text_userdata.split('\r\n')]#对目标文件使用\r\n进行分割,把分割好的每个元素加入列表n中#print l[0]#打印第一个元素信息i = 0n = []m = []while l[i] != "":#当列表l中的元素不等于空白,则进入    s = l[i].replace('"','')#对列表l中每个元素中的"符号使用空白替换    n.append(s)#加入列表n中    t = s.split(",")#对列表s使用,分割    m.append(t[6])#对列表t中第6个元素加入列表m中    f = open("test.txt","a")#打开文件test.txt    f.write(m[i])#对列表m每个元素写入文件test.txt中    f.close()    i = i + 1#循环加1#print m[0]#打印列表m的第一个元素j = 0yeararry = []timestamparry = []while m[j] != m[-1]:#对列表m循环元素时不等于最后一个呀元素是,则进入    timearry = time.strptime(m[j], "%Y-%m-%d %H:%M:%S")#对列表m每个元素转换成时间格式    timestamp = int(time.mktime(timearry))#对时间格式元素转成时间戳    timestamparry.append(timestamp)#对没有转成时间戳元素加入列表timestamparry    #year = time.localtime(timestamparry[j])[0]#对每个时间戳的元素取出第一个年份的值    #month = time.localtime(timestamparry[j])[1]#对每个时间戳的元素取出第二个月份的值    #day = time.localtime(timestamparry[j])[2]#对每个时间戳的元素取出第三个天数的值    hour = time.localtime(timestamparry[j])[3]#对每个时间戳的元素取出第四个小时的值    yeararry.append(str(hour))#对取出的整型小时数值,强制转成字符型,再组合,再加入列表yeararry    j = j + 1fredata = [(x,yeararry.count(x)) for x in set(yeararry)]#对列表yeararry中每个元素出现频次统计print fredataprint "--------------------------按出现的频次高低排序统计如下:--------------------------------"sortfredata = sorted(fredata)#对列表fredata转换成集合sortfredataprint sorted(sortfredata,key=lambda x:x[1],reverse=True)#对集合sortfredata中使用第二个key值大小,并从大到小排序root@kali:~/python/laowangpy/datadig# 

python运行情况:((‘22’, 17)中第一数字22为22点的时间值,第二个数字17为在该文本中出现17次)

root@kali:~/python/laowangpy/datadig# python 5whichhours.py [('20', 4), ('21', 3), ('22', 17), ('23', 3), ('1', 3), ('0', 13), ('3', 5), ('2', 10), ('5', 13), ('4', 7), ('7', 7), ('6', 10), ('9', 3), ('8', 3), ('11', 6), ('10', 7), ('13', 11), ('12', 7), ('15', 5), ('14', 2), ('17', 1), ('16', 9), ('19', 6), ('18', 1)]--------------------------按出现的频次高低排序统计如下:--------------------------------[('22', 17), ('0', 13), ('5', 13), ('13', 11), ('2', 10), ('6', 10), ('16', 9), ('10', 7), ('12', 7), ('4', 7), ('7', 7), ('11', 6), ('19', 6), ('15', 5), ('3', 5), ('20', 4), ('1', 3), ('21', 3), ('23', 3), ('8', 3), ('9', 3), ('14', 2), ('17', 1), ('18', 1)]root@kali:~/python/laowangpy/datadig# 
阅读全文
0 0