I am trying to get the frequency of words used in each post, and add them as columns to the training data, the code below is running appropriately for 1 word, for the 2nd word it throws this error.
Function to fetch frequency of a particular word in 1000 posts
word_frequency <- function(w){
for(i in 2:1000){
review_text <- paste(Train$Post[i:i], collapse=" ")
review_source <- VectorSource(review_text)
corpus <- Corpus(review_source)
corpus <- tm_map(corpus, content_transformer(tolower))
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, stripWhitespace)
corpus <- tm_map(corpus, removeWords, stopwords("english"))
dtm <- DocumentTermMatrix(corpus)
dtm2 <- as.matrix(dtm)
frequency <- colSums(dtm2)
frequency <- frequency[names(frequency) == w]
frequency <- as.list(frequency)
freq<-rbind(freq, frequency)
freq.withNA <- sapply(freq, function(x) ifelse(x == "NULL", NA, x))
}
return(freq)
}
Train <- Training[1:1000,]
Loping over all the words in my wordlist and cbind-ing frequency to the base data frame.
for (w in wordlist) {
freq <- as.integer()
new <- word_frequency(w)
Train <- cbind(Train, new)
print(paste("Completed word ", w, sep=""))
}