str_count()
from stringr
can help with this (and with a lot more string-based tasks, see this R4DS chapter).
library(string)
# Create a reproducible example
dat <- data.frame(Post = c(
"This is a sample post without any target words",
"Whilst this is green!",
"And this is eco-friendly",
"This is green AND eco-friendly!"))
lexicon <- data.frame(Word = c("green", "eco-friendly", "neutral"),
Polarity = c(1, 1, 0))
# Extract relevant words from lexicon
green_words <- lexicon$Word[lexicon$Polarity == 1]
# Create new variable
dat$n_green_words <- str_count(dat$Post, paste(green_words, collapse = "|"))
dat
Output:
#> Post n_green_words
#> 1 This is a sample post without any target words 0
#> 2 Whilst this is green! 1
#> 3 And this is eco-friendly 1
#> 4 This is green AND eco-friendly! 2
Created on 2022-07-15 by the reprex package (v2.0.1)