matplotlib与pyecharts:词云可视化

来源:互联网 发布:修改数据库创建时间 编辑:程序博客网 时间:2024/06/05 16:04

随便下载一个txt小说

代码如下:

#coding:utf-8from wordcloud import WordCloud,ImageColorGenerator,STOPWORDSfrom scipy.misc import imreadimport matplotlib.pyplot as pltfrom collections import Counterimport jieba#添加词库分词my_word_list=['楚天箫']def add_word(list):    for items in list:        jieba.add_word(items)add_word(my_word_list)#jieba.load_userdict("/home/soft/new.txt")#读txt内容#添加停用分词#stopwords=[u'自己',u'他们',u'我们',u'不是',u'只是']f_stop = open('/home/soft/stop.txt','r').read()stopwords = unicode(f_stop,'utf-8')f = open('/home/soft/wordcloud.txt','r').read().decode('gbk')def jiebaclearText(text):    mywordlist = []    seg_list =jieba.cut(text,cut_all=False)    for myword in seg_list:        if myword not in stopwords:           mywordlist.append(myword)    return ' '.join(mywordlist)cut_text = jiebaclearText(f)#print cut_text[0:200]color_mask = imread("/home/soft/star.jpg")# 读取背景图片cloud = WordCloud(font_path='/usr/share/fonts/wqy-zenhei/wqy-zenhei.ttc',#设置字体,不指定就会出现乱码                  background_color="white",width=1000, height=860, margin=2,#margin为词语边缘距离                  mask=color_mask,max_words=100,max_font_size=100)word_cloud = cloud.generate(cut_text)# 产生词云word_cloud.to_file("cloud.jpg")#c = Counter(cut_text).most_common(100)#print cplt.imshow(word_cloud)plt.axis("off")plt.show()
运行结果如下



原创粉丝点击