qq聊天记录词频查询 python实现

来源:互联网 发布:英雄无敌 mac 10.13 编辑:程序博客网 时间:2024/05/16 02:04

为了给后期聊天机器人提供大量的聊天词汇写了这个脚本,感兴趣的朋友可以看一下

下面是代码:

#-*-coding:utf8-*-import redef getWordRate(name,path,requestL):    file = open(path,'r')    person = 'None'    value = False    PersonNum = len(name)    dict = {}    for i in range(PersonNum):        dict[i] = {}    line = file.readline()    while line:        line = file.readline()        line = line.decode('utf-8')        value = False        for i in range(PersonNum):            if re.search(name[i],line)!=None:                person = name[i]                value = True        if value == False:            for i in range(PersonNum):                if person == name[i]:                    bef = {}                    if len(line)>requestL-1:                        for j in range(requestL-1):                            bef[j] = line[j]                        for z in range(requestL-1,len(line)):                            key = ""                            for j in range(requestL-1):                                key = key + bef[j]                            key = key + line[z]                            if requestL > 2:                                for j in range(requestL-2):                                    bef[j] = bef[j+1]                                bef[requestL-2] = line[z]                            else :                                if requestL == 2:                                    bef[0] = line[z]                            if dict[i].has_key(key):                                dict[i][key] = dict[i][key] + 1                            else:                                dict[i][key] =  1    return dictprint u'请输入聊天地址的路径'path = raw_input()print u'请输入你需要查找的总人数'num = int(raw_input())name = {}print u'请分别输入他们的名字'for i in range(num):    print u'姓名',i+1    name[i] = raw_input().decode('utf-8')while True:    print u'请输入你想查询词频的长度'    requestL = int(raw_input())    print u'请输入你想查询的对象姓名'    requestName = raw_input().decode('utf-8')    print u'查询中。。。'    dict = getWordRate(name,path,requestL)    i = 0    for key in name:        if name[key]==requestName:            break        i = i + 1    dict[i] = sorted(dict[i].iteritems(),key=lambda d:d[1],reverse=True)    for key in dict[i]:        print key[0],"------------------->",key[1]    print u'打印完毕,如果想继续查询输入yes'    if raw_input() != 'yes':        break
将qq聊天记录导出到目标文件夹,注意应该导出的格式是txt

path就填这个txt的名字 (如123.txt)

注意 输入人数必须输入所有聊天对象,也是本程序非常不智能的地方,比如是啊a和b的聊天记录,但a之前的昵称叫做c,那么输入人数为3


1 0
原创粉丝点击