R语言:词云图

来源:互联网 发布:荧光文字配图美图软件 编辑:程序博客网 时间:2024/04/30 02:49

这是当时在琢磨文本挖掘时的小技术,贴出来共享一下

library(Rwordseg) #分词的包#导入数据data = read.csv("C:\\Users\\hormy\\Documents\\咨询数据.csv",stringsAsFactors=F)#去除数字,英文字符data$内容 = gsub("[a-z0-9A-Z_]","",data$内容)#分词,Rwordseg包,手动加入分词表在工作路径的文件words = segmentCN(data$内容)#生成停词表stopwordsCN.txt,读入,确保是utf-8编码stopwordsCN = as.character(readLines("stopwordsCN.txt"))stopwordsCN = enc2utf8(stopwordsCN)stopwordsCN<-stopwordsCN[Encoding(stopwordsCN)!="unknown"]#编写去停词函数removeStopWords <- function(x,stopwords) {  temp <- character(0)  index <- 1  xLen <- length(x)  while (index <= xLen) {    if (length(stopwords[stopwords==x[index]]) <1)      temp<- c(temp,x[index])    index <- index +1  }  temp}#去停词words = lapply(words,removeStopWords,stopwordsCN)#画词云图library(wordcloud)#计算词频wordsnum = table(unlist(words))wordsnum = sort(wordsnum)  #排序#选出词频最高的250个wordsnum = tail(wordsnum,250)#画词云图wordcloud(names(wordsnum), as.vector(wordsnum),random.order=FALSE,
          random.color=FALSE,colors=brewer.pal(8,"Dark2"),family="myFont3")


0 0
原创粉丝点击