#to prepare for dataframesource you must change name to doc_id and text.
textdataframe <- textdataframe %>% rename(doc_id= orig_id, text= orig.narr)
corpus=Corpus(DataframeSource(textdataframe))
corpus = tm_map(corpus, PlainTextDocument)
corpus = tm_map(corpus, tolower)
corpus[[1]][1]
#remove punctuation
corpus = tm_map(corpus, removePunctuation)
corpus[[1]][1]
#remove stopwords
corpus = tm_map(corpus, removeWords, c("cloth", stopwords("english")))
corpus[[1]][1]
#stemming
corpus = tm_map(corpus, stemDocument)
corpus[[1]][1]
What ends up happening is i lose my unique id's that i assigned when setting dataframe source. I would like to set it up and continue it to be edited as i go along with clean and stem.