library(tm)
library(NLP)
library(twitteR)
for (i in 11:15) { cat(paste("[[", i, "]] ", sep=""))+ writeLines(strwrap(rdmTweets[]$getText(), width=73))}
df=do.call("rbind",lapply(rdmTweets,as.data.frame))
dim(df)
library(tm)
myCorpus=Corpus(VectorSource(df$text))
myCorpus
myCorpus=tm_map(myCorpus,tolower)
myCorpus=tm_map(myCorpus,removePunctuation)
myCorpus=tm_map(myCorpus,removeNumbers)
removeURL <- function(x) gsub("http[[:alnum:]]*", "", x)
myCorpus <- tm_map(myCorpus, removeURL)
myStopwords <- c(stopwords('english'), "available", "via")
myStopwords <- setdiff(myStopwords, c("r", "big"))
myCorpus=tm_map(myCorpus,removeWords,myStopwords)
myCorpus[1]
myCorpusCopy=myCorpus
library(SnowballC)
library(RWeka)
myCorpus<-tm_map(myCorpus,stemDocument)
for (i in 11:15) { cat(paste("[[", i, "]] ", sep="")) +writeLines(strwrap(myCorpus[], width=73))}
myCorpus <- tm_map(myCorpus, stemCompletion, dictionary=myCorpusCopy)
myDtm = DocumentTermMatrix(myCorpus, control = list(minWordLength = 3))
Error: inherits(doc, "TextDocument") is not TRUE
进行到英文词向量矩阵的时候,报错如下,求各位大神指教一下,感激不尽!


雷达卡


京公网安备 11010802022788号







