I've been working with data.table plus stringi a bit more recently so I thought I'd throw these solutions up that are similar to the dplyr solution but may give a nice speed boost with larger data sets.
dat <- data.frame(
comments= c("i want to hear that", "lets get started", "i want to get started"),
date = as.Date(c("2010-11-01", "2008-03-25", "2007-03-14")), stringsAsFactors = FALSE
)
library(data.table); library(stringi)
setDT(dat)
dat[, list(word = unlist(stri_extract_all_words(comments)))][,
list(freq=.N), by = 'word'][order(word),]
## word freq
## 1: get 2
## 2: hear 1
## 3: i 2
## 4: lets 1
## 5: started 2
## 6: that 1
## 7: to 2
## 8: want 2
dat[, list(word = unlist(stri_extract_all_words(comments))), by="date"][,
list(freq=.N), by = c('date', 'word')][order(date, word),]
## date word freq
## 1: 2007-03-14 get 1
## 2: 2007-03-14 i 1
## 3: 2007-03-14 started 1
## 4: 2007-03-14 to 1
## 5: 2007-03-14 want 1
## 6: 2008-03-25 get 1
## 7: 2008-03-25 lets 1
## 8: 2008-03-25 started 1
## 9: 2010-11-01 hear 1
## 10: 2010-11-01 i 1
## 11: 2010-11-01 that 1
## 12: 2010-11-01 to 1
## 13: 2010-11-01 want 1