python 统计pvuv 二

来源:互联网 发布:nginx伪静态规则写法 编辑:程序博客网 时间:2024/06/06 15:54

test.py

#!/usr/bin/pythonimport osimport sysimport stringimport json#awk -F ',' 'substr($0,21,2)=='14'{print $0}' * > 14.logdef run():logfile = '14.log'res = dict()if not os.path.exists(logfile):        print 'error:' + logfile + ' not existed.'        print 'hint:awk -F \',\' \'substr($0,21,2)==\'14\'{print $0}\' WB_LOG_LOAD* > 14.log'        exit()#time zonedts = set()dts.add('2015-04-29 00')dts.add('2015-04-29 01')dts.add('2015-04-29 02')dts.add('2015-04-29 03')dts.add('2015-04-29 04')dts.add('2015-04-29 05')dts.add('2015-04-29 06')dts.add('2015-04-29 07')dts.add('2015-04-29 08')f = open(logfile,'r')line = f.readline()while line:if line[0:13] not in dts:        #print line[0:13]line = f.readline()continue        arr = line.split(',')if len(arr) != 7:line = f.readline()continue        #log id and setkey are existed.        if arr[2] and arr[5]:                #print arr[1][3:]                key = arr[5]                item = res.get(key, -1)                if -1 == item:                        item = {'pv':0, 'uvSet':set([])}                        res[key] = item                item['pv'] = item['pv'] + 1                item['uvSet'].add(arr[2])        #print len(arr)line = f.readline()f.close()for item in res:        print item + ' pv:' + str(res[item]['pv']) + ' uv:' + str(len(res[item]['uvSet']))



main.py

#!/usr/bin/pythonimport osimport sysimport testif '__main__' == __name__:num = 20if len(sys.argv) > 1:num = int(sys.argv[1])#run(num)test.run()




0 0
原创粉丝点击