Python字典使用--词频统计的GUI实现

来源：互联网发布：dct算法进行图像压缩编辑：程序博客网时间：2024/05/18 13:30

第一步：输入文章

第二步：建立用于词频计算的空字典

第三步：对文本的每一行计算词频，如果文章长度一般，则不需用一次读一行，一次便可读完。

第四步：从字典中获取数据对到列表中

第五步：对列表中的数据对交换位置，并从大到小进行排序

第六步：输出结果

下图所示为程序输出结果及输出的统计结果

汉字的词频统计、排除特定词集合的程序后续更新...

#词频统计程序count=0#对文本的每一行计算词频的函数def processLine(line, wordCounts):    global count    #用空格替换标点符号    line = replacePunctuations(line)    #从每一行获取每个词    words = line.split()     for word in words:        count+=1        if word in wordCounts:            wordCounts[word] += 1        else:            wordCounts[word] = 1 #空格替换标点的函数def replacePunctuations(line):    for ch in line:        if ch in "~@#$%^&*()_-+=<>?/,.:;{}[]|\'""":            line = line.replace(ch, " ")    return line def main():    #用户输入一个文件名    filename1 = input("输入待统计文本名:").strip()    infile = open(filename1, "r")         #建立用于计算词频的空字典    wordCounts = {}    for line in infile:        processLine(line.lower(), wordCounts)             #从字典中获取数据对    pairs = list(wordCounts.items())     #列表中的数据对交换位置,数据对排序    items = [[x,y]for (y,x)in pairs]     items.sort()    items.reverse()      #显示前5个数词频结果    for i in range(5):        print(items[i][1]+"\t"+str(items[i][0]))        #将统计结果写入文本文件中    outfile = open('词频统计结果.txt', "w")    lines = []      lines.append('单词种类：')    lines.append(str(len(items)))    lines.append('\n')    lines.append('单词总数：')    lines.append(str(count))    lines.append('\n')    lines.append('词频排序如下:\n')    lines.append('word\tcounts\n')    s= ''    for i in range(len(items)):        s = '\t'.join([str(items[i][1]), str(items[i][0])])        s += '\n'          lines.append(s)    print('\n统计完成！\n')    outfile.writelines(lines)        outfile.close()         infile.close()     #调用main()函数if __name__ == '__main__':    main()

上面程序的精简版本：

def getText():    txt=open('hamlet.txt','r').read()    txt=txt.lower()    for ch in '!"#$%&*()+,.-;:<=>?@[]\^_{}|`':        txt=txt.replace(ch,' ')    return txthamletTxt=getText()words=hamletTxt.split()counts={}for word in words:    counts[word]=counts.get(word,0)+1items=list(counts.items())items.sort(key=lambda x:x[1],reverse=True)for i in range(10):    word,count=items[i]    print('{0:<10}{1:>5}'.format(word,count)) #将统计结果写入文本文件中outfile = open('词频统计结果.txt', "w")lines = []  lines.append('单词种类：')lines.append(str(len(items)))lines.append('\n')lines.append('单词总数：')lines.append(str(count))lines.append('\n')lines.append('词频排序如下:\n')lines.append('word\tcounts\n')s= ''for i in range(len(items)):    s = '\t'.join([str(items[i][1]), str(items[i][0])])    s += '\n'      lines.append(s)print('\n统计完成！\n')outfile.writelines(lines)outfile.close()

使用GUI实现的截图如下：

GUI程序代码如下：

import tkinter as tkfrom tkinter import ttkfrom tkinter import scrolledtextfrom tkinter import filedialogfrom tkinter import messagebox as mBox#获取原文内容def getText(DIR):    txt=open(DIR,'r').read()    return txt    txt.close()#打开文件def __opendir():    srcText.delete('1.0', tk.END) # 先删除所有            # 打开文件夹对话框    fname = filedialog.askopenfilename(filetypes=( ("Text file", "*.txt*"),("HTML files", "*.html;*.htm")))    entryvar.set(fname) # 设置变量entryvar，等同于设置部件Entry            if not fname:        mBox.showwarning('警告', message='未选择文件夹！')  # 弹出消息提示框    #显示需要统计的文本    Txt=getText(fname)    srcText.insert(tk.END, Txt)                srcText.update()    #手动输入文件名时回车键触发      def srcEnter(event=None):        fname=DirEntry.get()    if not fname:        mBox.showwarning('警告', message='请选择文件！')  # 弹出消息提示框            Txt=getText(fname)    srcText.insert(tk.END, Txt)                srcText.update()#词频统计def wordFrequence():    fname=DirEntry.get()    if not fname:        mBox.showwarning('警告', message='请选择文件！')  # 弹出消息提示框    txt=getText(fname)        #对原文进行小写，标点符号转换处理    txt=txt.lower()    for ch in '!"#$%&*()+,.-;:<=>?@[]\^_{}|`':        txt=txt.replace(ch,' ')    #词频统计    words=txt.split()    counts={} #用空字典存储统计结果    for word in words:        counts[word]=counts.get(word,0)+1    #词频排序    items=list(counts.items())    items.sort(key=lambda x:x[1],reverse=True)    #输出排序结果    num=0    for i in range(len(counts)):        word,count=items[i]        num=i*count+num    dstText.insert(tk.END, '单词种类：')    dstText.insert(tk.END, str(len(items)))    dstText.insert(tk.END, '\n')    dstText.insert(tk.END, '单词总数：')    dstText.insert(tk.END, str(num))    dstText.insert(tk.END, '\n')    dstText.insert(tk.END, '词频排序如下:\n')    dstText.insert(tk.END, '#word:\t\t#counts:\n')    for i in range(len(counts)):        word,count=items[i]        dstText.insert(tk.END, word)        dstText.insert(tk.END, '\t\t')        dstText.insert(tk.END, count)        dstText.insert(tk.END, '\n')def savefile():    # 打开文件夹对话框    dirname = filedialog.askdirectory()     outvar.set(dirname) # 设置变量entryvar，等同于设置部件Entry            if not dirname:        mBox.showwarning('警告', message='请选择保存位置！')  # 弹出消息提示框    fname=dirname+'\词频统计结果.txt'    outfile = open(fname, "w")    outfile.writelines(dstText.get(1.0,tk.END))    outfile.close()    mBox.showinfo('词频统计', '统计结果保存成功！')def dstEnter(event=None):    dirname=outvar.get()    if not dirname:        mBox.showwarning('警告', message='请选择保存位置！')  # 弹出消息提示框    fname=dirname+'\词频统计结果.txt'    outfile = open(fname, "w")    outfile.writelines(dstText.get(1.0,tk.END))    outfile.close()    mBox.showinfo('词频统计', '统计结果保存成功！')    # Create instancewin = tk.Tk()   # Add a title       win.title("词频统计GUI")# Disable resizing the GUIwin.resizable(0,0)#---------------窗口控件介绍------------------##打开文件对话框SelDirButton = ttk.Button(win, command=__opendir, text='选择文件目录：')SelDirButton.grid(row=0, column=0,sticky=tk.W,pady=3,padx=3)#文件的目录显示    entryvar = tk.StringVar() DirEntry=ttk.Entry(win, width=30,textvariable=entryvar)DirEntry.grid(row=1, column=0,sticky=tk.W,pady=3,padx=3)DirEntry.bind('<Return>', func=srcEnter)#文件内容的显示srcText = scrolledtext.ScrolledText(win,width=30,height=30)#内容输出框srcText.grid(row=2, column=0,columnspan=1,sticky=tk.W,pady=3,padx=3)#词频统计按钮CalcuButton = ttk.Button(win, command=wordFrequence, text='词频统计')CalcuButton.grid(row=0, column=1,sticky=tk.W,pady=3,padx=3)#统计结果显示dstText = scrolledtext.ScrolledText(win,width=30,height=30)#内容输出框dstText.grid(row=2, column=1,columnspan=2,sticky=tk.W,pady=3,padx=3)#保存文件按钮SavefileButton = ttk.Button(win, command=savefile, text='统计结果保存到：')SavefileButton.grid(row=0, column=2,sticky=tk.W,pady=3,padx=3)#保存文件目录outvar = tk.StringVar() saveEntry=ttk.Entry(win, width=30,textvariable=outvar)saveEntry.grid(row=1, column=1,columnspan=2,sticky=tk.W,pady=3,padx=3)saveEntry.bind('<Return>', func=dstEnter)     #======================# Start GUI#======================win.mainloop()


                                                     0        0           	
					
					   Python字典使用--词频统计的GUI实现
	  	   使用Python+NLTK实现英文单词词频统计
	  	   使用Python+NLTK实现英文单词词频统计
	  	   【Python基础】1.2.1 字典统计词频
	  	   python统计词频的方法
	  	   字典实例：统计词频
	  	   使用python的map和reduce统计词频
	  	   词频统计的C++实现（使用stl--map）
	  	   Python实现中文小说词频统计
	  	   统计文章词频（python实现）
	  	   python代码简单实现一个词频统计
	  	   Python起步之字典案例（词频统计）
	  	   Python使用Hadoop进行词频统计
	  	   Trie树（字典树）实现词频统计或前缀匹配类型的问题
	  	   字典树应用——词频统计 （C++实现）
	  	   python--更干净的词频统计
	  	   词频统计:字典树+dfs
	  	   python 统计词频
	     		  
	  	   2017-2-18  POJ6377
	  	   Redis 集群规范
	  	   Miller_Robin素数判定和Pollard_rho质因数分解模板
	  	   [讨论] PADS PCB功能使用技巧系列 —— 如何走差分线？
	  	   Redis事务（transaction）
	  	   Python字典使用--词频统计的GUI实现
	  	   扩展欧几里得总结
	  	   linux指令（一）——常用
	  	   ext4文件系统由文件的inode号定位其inode Table
	  	   简易版主题切换功能的实现
	  	   python的virtualenv环境与使用
	  	   基于Netty5的RPC架构笔记4之案例讲解
	  	   mysql基本查询语句review-2
	  	   React初学体验