python日志处理脚本模板

来源：互联网发布：淘宝保健品怎么发布编辑：程序博客网时间：2024/06/05 21:13

比如处理的日志格式为

2016-08-03 15:35:06,749 MainThread web.py:1946 INFO 200 GET /update_proxy?source=ctripRoundFlight&proxy=110.214.72.53:8123&error=22&speed=0.642103910446 (10.10.141.231) 0.40ms

</pre><pre name="code" class="python">

  1# -*- coding: utf-8 -*-  2   3 import sys  4 sys.path.append('/home/workspace/ProxyServer/bin')  5 import os  6 import time  7 import datetime  8 #import db_local as db  9 from DBHandle import DBHandle 10 import re 11 import json 12 #from send_mail import send_mail 13  14  15 log_pat = re.compile(r'update_proxy\?source=(.*?)&proxy=(.*?)&error=(.*?)&speed=(.*?) ') 16  17 log_dir = '/search/log/proxy_new' 18  19 def get_filename(): 20     """ 21     获取当前时间应该统计的 log 文件名 22     """ 23     all_files = os.listdir(log_dir) 24     files=[] 25     for each_file in all_files: 26         if re.match(r'proxy.*_\d\d\.log',each_file) and os.path.isfile(log_dir+'/'+each_file): 27             time = datetime.datetime.strptime(each_file[5:16],'%Y%m%d_%H') 28             if time<datetime.datetime.now()-datetime.timedelta(hours=1): 29                 files.append(log_dir+'/'+each_file) 30  31     return files 32  33 def generate_statistics(files): 34     """ 35     读取预处理之后的文件，将其处理成针对每个源的统计结果 36     """ 37  38     stat_dict = dict() 39     files=sorted(files,reverse=True) 40     #files=sorted(files) 41     for file_name in files: 42         #print file_name 43         shell_code = 'grep "update_proxy?source" %s > /search/log/statistic_log/temp_proxy_log' % file_name 44         os.system(shell_code) 45         with open('/search/log/statistic_log/temp_proxy_log') as f: 46             content_list = f.readlines() 47             for each_content in content_list[::-1]: 48                 #print each_content 49                 try: 50                     log_content = log_pat.search(each_content).groups() 51                     source_name, proxy_string, error_code, speed = log_content 52                     print log_content 53                     if '.' in proxy_string and ':' in proxy_string: 54                         proxy_ip = proxy_string 55                     else: 56                         continue 57  58                     source=source_name 59                     for typ in ['Car','Bus','MultiFlight','multiFlight','RoundFlight','roundFlight','Flight','Rail','ListHotel','listHotel','Hotel']: 60                         if source_name.endswith(typ): 61                             source=re.sub(typ+'$','',source_name) 62                             break 63  64                     stat_dict.setdefault(source,{}) 65                     stat_dict[source].setdefault(proxy_ip,[]) 66                     if len(stat_dict[source][proxy_ip])<20: 67                         stat_dict[source][proxy_ip].append((int(error_code),file_name[27:38])) 68                 except Exception, e: 69                     continue 70     return stat_dict 71  72 def stat_log(): 73     """ 74     log 统计的整个流程 75     """ 76  77     files = get_filename() 78     stat_dict = generate_statistics(files) 79  80     for source,dic in stat_dict.items(): 81         print source+':' 82         for proxy_ip,error_list in dic.items(): 83             success = 0 84             for error in error_list: 85                 if error[0]==0: 86                     success += 1 87             if success*1.0/len(error_list)>0.5: 88                 print proxy_ip, [error[0] for error in error_list[::-1]], error_list[-1][1]+'--'+error_list[0][1] 89  90  91 if __name__ == '__main__': 92     stat_log()

0 0