python做词云Wordcloud

来源:互联网 发布:js为标签添加属性 编辑:程序博客网 时间:2024/05/24 20:07

一、jieba分词后直接用Wordcloud做词云

import jiebafrom jieba.analyse import extract_tagsfrom wordcloud import WordCloudimport matplotlib.pyplot as plt#from scipy.misc import imread#读取小说“择天记”txtname = '择天记'max_words = 50path = 'C:/Users/Administrator/Desktop'txtfile = path + '/' + txtname + '.txt'content = open(txtfile, 'r', encoding='utf-8').read()  #评论内容#载入词典dictfile = path + '/' + 'zetianji.txt'jieba.load_userdict(dictfile) # file_name 为文件类对象或自定义词典的路径#分词cut_text = " ".join(jieba.cut(content))#读取字体font_file = r'C:/Windows/Fonts/STKAITI.TTF'#词云设置wc = WordCloud(font_path=font_file, #设置字体                 background_color="black",#背景颜色                 max_words= 50,# 词云显示的最大词数                 max_font_size=100, #字体最大值mask = back_coloring, #背景图                 random_state=42                 )word_cloud = wc.generate(cut_text) # 产生词云word_cloud.to_file("C:/Users/Administrator/Desktop/cloud1.jpg") #保存图片

二、使用jieba.analyse.extract_tags统计词频后做词云

extract_tags基于 TF-IDF 算法的关键词抽取,此处只有一个文件,统计出来就是词频咯
import jiebafrom jieba.analyse import extract_tagsfrom wordcloud import WordCloudimport matplotlib.pyplot as plt#from scipy.misc import imread#读取小说“择天记”txtname = '择天记'max_words = 50path = 'C:/Users/Administrator/Desktop'txtfile = path + '/' + txtname + '.txt'content = open(txtfile, 'r', encoding='utf-8').read()  #评论内容#载入词典dictfile = path + '/' + 'zetianji.txt'jieba.load_userdict(dictfile) # file_name 为文件类对象或自定义词典的路径#根据tf-idf值找出文件中的关键词tags = extract_tags(content, topK=max_words)#分析得到关键词的词频word_freq_dict = dict()word_list = jieba.lcut(content)for tag in tags:    freq = word_list.count(tag)    word_freq_dict[tag] = freqfont_file = r'C:/Windows/Fonts/STKAITI.TTF'wc = WordCloud(font_path=font_file, #设置字体                 background_color="black",#背景颜色                 max_words= 50,# 词云显示的最大词数                 max_font_size=100, #字体最大值mask = back_coloring, #背景图                 random_state=42                 )wc.generate_from_frequencies(word_freq_dict)plt.imshow(wc)plt.axis("off")plt.show()  wc.to_file("C:/Users/Administrator/Desktop/pjl_cloud4.jpg") #保存图片