wordcloud词云使用

来源:互联网 发布:广联达软件下载免费 编辑:程序博客网 时间:2024/06/01 10:26
# -*- coding: utf-8 -*-"""Created on Wed Aug 16 13:53:52 2017@author: gzs10227"""from os import pathfrom scipy.misc import imreadimport matplotlib.pyplot as pltfrom wordcloud import WordCloud, STOPWORDS, ImageColorGeneratorimport jiebaimport jieba.analyseimport refrom collections import Counterplt.rcParams['font.family'] = ['sans-serif']plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']d = path.dirname(u'E:/廖庆豪/2017/201708/文本分析/')text = open(path.join(d, 'ngc.txt')).read()# read the mask / color image# taken from http://jirkavinse.deviantart.com/art/quot-Real-Life-quot-Alice-282261010# 设置背景图片alice_coloring = imread(path.join(d, "test.jpg"))wc = WordCloud(background_color="black", #背景颜色max_words=2000,# 词云显示的最大词数font_path='E:/Anaconda2/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/Microsoft YaHei.ttf',mask=alice_coloring,#设置背景图片stopwords=STOPWORDS.add("said"),max_font_size=40, #字体最大值random_state=42)text = re.sub(r'\[b\].*?\[\/b\]','',text)text = re.sub(r'\[quote\].*?\[\/pid\]','',text)text = re.sub(r'\[img\].*?\[\/img\]','',text)text = re.sub(r'\[align=center\].*?\[list\]','',text)text = re.sub(r'\[quote\].*?\[\/quote\]','',text)text = re.sub(r'\[url\].*?\[\/url\]','',text)text = re.sub(r'\[s:.*?\]','',text)text = re.sub(r'\[.*?\]','',text)text = text.replace('\t','').replace('\n','').replace(' ','')text = re.sub(r'[,:.!=?#%()#+-\/0-9]+','',text)    ci = [i for i in jieba.cut(text) if len(i)>1]data = dict(Counter(ci))ci_list = []for k,v in data.items():    temp = (k,v)    ci_list.append(temp)    # 生成词云, 可以用generate输入全部文本(中文不好分词),也可以我们计算好词频后使用generate_from_frequencies函数wc.generate(text)wc.generate_from_frequencies(data)# txt_freq例子为[('词a', 100),('词b', 90),('词c', 80)]# 从背景图片生成颜色值image_colors = ImageColorGenerator(alice_coloring)# 以下代码显示图片plt.imshow(wc)plt.axis("off")# 绘制词云plt.figure()# recolor wordcloud and show# we could also give color_func=image_colors directly in the constructorplt.imshow(wc.recolor(color_func=image_colors))plt.axis("off")# 绘制背景图片为颜色的图片plt.figure()plt.imshow(alice_coloring, cmap=plt.cm.gray)plt.axis("off")plt.show()# 保存图片wc.to_file(path.join(d, "tt.png"))

原创粉丝点击