apache日志文件处理成sqlite数据库

来源：互联网发布：与全世界做生意知乎编辑：程序博客网时间：2024/06/04 20:18
#! -- coding:utf-8 --import reimport sqlite3import timeglobal countdef insert_db(parse_list): db = sqlite3.connect('apache.sqlite') cur = db.cursor() #cur.execute('DROP TABLE IF EXISTS APACHE_LOG') cur.execute('''CREATE TABLE IF NOT EXISTS APACHE_LOG( eventno INTEGER NOT NULL, eventtype TEXT NOT NULL, sipaddr TEXT NOT NULL, identd TEXT NOT NULL, userid TEXT NOT NULL, timestamp INTEGER NOT NULL, timestampusec INTEGER, httpreq TEXT NOT NULL, httpstatus INTEGER NOT NULL, contentlength INTEGER NOT NULL, referer TEXT NOT NULL, useragent TEXT NOT NULL )''') #timetraslate temp_time = parse_list['timestamp'].split(" ") times = temp_time[0][1:] # print times tup_birth = time.strptime(times,"%d/%b/%Y:%H:%M:%S") parse_list['timestamp']= time.mktime(tup_birth)# print secds query = '''insert into APACHE_LOG (eventno,eventtype,sipaddr,identd,userid,timestamp,timestampusec,httpreq,httpstatus,contentlength,referer,useragent) values (%d,'%s','%s','%s','%s','%s','%s','%s','%s','%s','%s','%s');''' \ % (parse_list['eventno'], parse_list['eventtype'], parse_list['sipaddr'], parse_list['identd'], parse_list['userid'], parse_list['timestamp'],'1.0' ,parse_list['httpreq'], parse_list['httpstatus'], parse_list['contentlength'], parse_list['referer'], parse_list['useragent']) cur.execute(query) db.commit() cur.close() db.close() returndef process(line): global count line_dic = dict(eventno='', eventtype='APACHE_COMBINED', sipaddr='', identd='', userid='', timestamp='', httpreq='', httpstatus='', contentlength='', referer='', useragent='') regex = r'^([0-9.]+)\s([\w.-]+)\s([\w.-]+)\s(\[[^\[\]]+\])\s"((?:[^"]|\")+)"\s(\d{3})\s(\d+|-)\s"((?:[^"]|\")+)"\s"((?:[^"]|\")+)"$' match = re.search(regex, line) line_dic['eventno'] = count line_dic['sipaddr'] = match.group(1) line_dic['identd'] = match.group(2) line_dic['userid'] = match.group(3) line_dic['timestamp'] = match.group(4) # line_dic[timestampusec]=match.group(5) line_dic['httpreq'] = match.group(5) line_dic['httpstatus'] = match.group(6) line_dic['contentlength'] = match.group(7) line_dic['referer'] = match.group(8) line_dic['useragent'] = match.group(9) count = count + 1 return line_dicdef parse(log_file): while True: line = log_file.readline() if not line: break parse_list = process(line) insert_db(parse_list)def main(): global count count = 0 filename = r'access.log' with open(filename) as apache_log_file: parse(apache_log_file) print countif __name__ == '__main__': main()结果:
1 0