- setwd("C:/Users/2015/Desktop/test")
- library(tm);
- library(tmcn);
- yuliaoku<-Corpus(DirSource("C:/Users/2015/Desktop/test",pattern = "*.txt"),readerControl = list(language="UTF-8")) #
- yuliaoku<-tm_map(yuliaoku,stripWhitespace)
- library(rJava);
- library(Rwordseg);
- yuliaoku<-tm_map(yuliaoku,content_transformer(segmentCN),returnType="tm")
- control<-list(wordLengths=c(1,5),stopwords=stopwordsCN())
- mt<-TermDocumentMatrix(yuliaoku,control = control)
- vmt<-as.matrix(mt)
- val<-sort(rowSums(vmt),decreasing = TRUE)
- df<-data.frame(word=names(val),freq=val)
- library(wordcloud)
- wordcloud(df$word,df$freq,min.freq = 3,random.order = FALSE,colors = rainbow(length(row.names(vmt))))
附件为测试文本


雷达卡





京公网安备 11010802022788号







