python 这断分析日志的代码,测试几百行文件没问题。但是分析大日志文件,cup 100%,而且有问题。求大神指点。

来源:互联网 发布:蚁群算法matlab程序 编辑:程序博客网 时间:2024/04/28 10:24
# -*- coding:utf-8 -*-
#! /bin/env python

import re,sys,string
import string,time
import os
import operator
import time

times = time.strftime('%Y-%d-%m')
Pwd = '/tmp/data'
openfile = open("%s/blog" % Pwd ,"r")
dict = {}
all_dict = []
re1='^.*?'
url = '\"(.*?)\"'
re2='.*?'
time='(\d*\.\d*)$'
rg = re.compile(re1+url+re2+time,re.IGNORECASE|re.DOTALL)

def start(self):
    for lines in self:
        m = rg.search(lines)
        if m:
            urls=m.group(1)
            times=m.group(2)
            if urls in dict.keys():
                ti = float(times) + float(times)
                dict[urls]= "%.4f" % ti
            else:
                dict.fromkeys(['urls','times'])
                dict[urls]= "%.4f" % float(times)
            all_dict.append(urls)

def get_url(self):
    code_num = {}
    for code_item in set(self):
        code_num[code_item]=self.count(code_item)
    sorted_code = sorted( code_num.iteritems(), key=operator.itemgetter(1),reverse=True)
    global a
    a = sorted_code[0:10]
    return a
def end_url_time(x,y):
    end_dict = {}
    for i in range(len(x)):
        number =  y.get(x[i][0])
        all_time = float(number)/float(x[i][1])
        test_url = x[i][0]
        end_dict[test_url] = "%.4f" % all_time
    global dicts
    dicts = sorted(end_dict.iteritems(), key=lambda d:d[1], reverse = True)

def main():
    start(openfile)
    get_url(all_dict)
    end_url_time(a,dict)
    file = open("/tmp/data/%s" % times,"a+")
    for i in range(len(dicts)):
        file.write("%s\n" % str(dicts[i]))
    file.close()
    openfile.close()
if __name__ == '__main__':
    main()