使用Python分析谷歌浏览器Chrome的历史记录

来源:互联网 发布:球球大作战java源代码 编辑:程序博客网 时间:2024/05/17 18:27

先安装包matplotlib

pip install matplotlib
整篇的代码片

import osimport sqlite3import operatorfrom collections import OrderedDictimport matplotlib.pyplot as pltdef parse(url):    try:        parsed_url_components = url.split('//')        sublevel_split = parsed_url_components[1].split('/', 1)        domain = sublevel_split[0].replace("www.", "")        return domain    except IndexError:        print "URL format error!"def analyze(results):    prompt = raw_input("[.] Type <c> to print or <p> to plot\n[>] ")    if prompt == "c":        for site, count in sites_count_sorted.items():            print site, count    elif prompt == "p":        plt.bar(range(len(results)), results.values(), align='edge')        plt.xticks(rotation=45)        plt.xticks(range(len(results)), results.keys())        plt.show()    else:        print "[.] Uh?"        quit()#path to user's history database (Chrome)data_path = os.path.expanduser('~')+"\AppData\Local\Google\Chrome\User Data\Default"files = os.listdir(data_path)history_db = os.path.join(data_path, 'history')#querying the dbc = sqlite3.connect(history_db)cursor = c.cursor()select_statement = "SELECT urls.url, urls.visit_count FROM urls, visits WHERE urls.id = visits.url;"cursor.execute(select_statement)results = cursor.fetchall() #tuplesites_count = {} #dict makes iterations easier :Dfor url, count in results:    url = parse(url)    if url in sites_count:        sites_count[url] += 1    else:        sites_count[url] = 1sites_count_sorted = OrderedDict(sorted(sites_count.items(), key=operator.itemgetter(1), reverse=True))analyze (sites_count_sorted)

实现的效果
a、统计图
这里写图片描述
b、数据的统计
这里写图片描述

参考地址:http://mp.weixin.qq.com/s?__biz=MjM5NzU0MzU0Nw==&mid=2651370207&idx=1&sn=0edfb3ea20f1eefe06bd910bf2e9018a&mpshare=1&scene=24&srcid=1117HhAxrzIaIieGgQEmif8T#rd

0 0
原创粉丝点击