I have a graph with 3 lines and the only way i could think of to do it was this:
corp %>%
mutate(month_year = format(as.POSIXct(corp$review.date,
format = "%d-%b-%y"
), format = "%y")) %>%
inner_join(get_sentiments("bing"), by = "word") %>%
group_by(month_year) %>%
count(month_year, sentiment) %>%
spread(sentiment, n) %>%
mutate(ratio = positive / negative) %>%
ggplot(aes(x = month_year, y = ratio)) +
geom_hline(yintercept = 1, color = "white", size = 2) +
geom_line(aes(x = month_year, y = ratio, group = 1, color = "purple")) +
geom_line(
data = corp %>%
mutate(month_year = format(as.POSIXct(corp$review.date,
format = "%d-%b-%y"
), format = "%y")) %>%
filter(verified.purchase == FALSE) %>%
inner_join(get_sentiments("bing"), by = "word") %>%
group_by(month_year) %>%
count(month_year, sentiment) %>%
spread(sentiment, n) %>%
mutate(ratio = positive / negative),
aes(month_year, ratio, group = 1, color = "brown")
) +
geom_line(
data = corp %>%
mutate(month_year = format(as.POSIXct(corp$review.date,
format = "%d-%b-%y"
), format = "%y")) %>%
filter(verified.purchase == TRUE) %>%
inner_join(get_sentiments("bing"), by = "word") %>%
group_by(month_year) %>%
count(month_year, sentiment) %>%
spread(sentiment, n) %>%
mutate(ratio = positive / negative),
aes(month_year, ratio, group = 1, color = "red")
) +
ggtitle("Sentiment Analysis of Reviews Over Time")
I am trying to label the lines with "all", "verified", "Unverified" respectively. However, I can't seem to get the labels set up. right now there is a legend that just says the names of the colors. also, is there a better way to put multiple lines on a graph?
here is the dput:
structure(list(review.ID = c("R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK"
), review.rating = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), review.date = c("4-Jun-18",
"4-Jun-18", "4-Jun-19", "4-Jun-20", "4-Jun-21", "4-Jun-17", "4-Jun-16",
"4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18",
"4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18",
"4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18",
"4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18",
"4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18",
"4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18",
"4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18",
"4-Jun-18"), verified.purchase = c(FALSE, FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE,
TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE), word = c("initial", "box", "review", "headphones", "pro",
"audio", "experience", "excellent", "home", "studio", "personal",
"listening", "headphones", "dj", "quality", "talking", "weekend",
"warrior", "occasional", "party", "dj", "yeah", "professional",
"commercial", "dj", "subjects", "lot", "physical", "punishment",
"absolutely", "real", "headphones", "positive", "star", "reasons",
"sound", "quality", "price", "hard", "pressed", "real", "notable",
"difference", "dollar", "studio", "grade", "sennheiser", "bose",
"headphone", "driver")), row.names = c(NA, 50L), class = "data.frame")