0

I have a graph with 3 lines and the only way i could think of to do it was this:

 corp %>%
 mutate(month_year = format(as.POSIXct(corp$review.date,
        format = "%d-%b-%y"
    ), format = "%y")) %>%
    inner_join(get_sentiments("bing"), by = "word") %>%
    group_by(month_year) %>%
    count(month_year, sentiment) %>%
    spread(sentiment, n) %>%
    mutate(ratio = positive / negative) %>%
    ggplot(aes(x = month_year, y = ratio)) +
    geom_hline(yintercept = 1, color = "white", size = 2) +
    geom_line(aes(x = month_year, y = ratio, group = 1, color = "purple")) +
    geom_line(
        data = corp %>%
            mutate(month_year = format(as.POSIXct(corp$review.date,
                format = "%d-%b-%y"
            ), format = "%y")) %>%
            filter(verified.purchase == FALSE) %>%
            inner_join(get_sentiments("bing"), by = "word") %>%
            group_by(month_year) %>%
            count(month_year, sentiment) %>%
            spread(sentiment, n) %>%
            mutate(ratio = positive / negative),
        aes(month_year, ratio, group = 1, color = "brown")
    ) +
    geom_line(
        data = corp %>%
            mutate(month_year = format(as.POSIXct(corp$review.date,
                format = "%d-%b-%y"
            ), format = "%y")) %>%
            filter(verified.purchase == TRUE) %>%
            inner_join(get_sentiments("bing"), by = "word") %>%
            group_by(month_year) %>%
            count(month_year, sentiment) %>%
            spread(sentiment, n) %>%
            mutate(ratio = positive / negative),
        aes(month_year, ratio, group = 1, color = "red")
    ) + 
    ggtitle("Sentiment Analysis of Reviews Over Time")

I am trying to label the lines with "all", "verified", "Unverified" respectively. However, I can't seem to get the labels set up. right now there is a legend that just says the names of the colors. also, is there a better way to put multiple lines on a graph?

here is the dput:

 structure(list(review.ID = c("R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", 
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK",
"R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK", "R1DQZ2UGUUH5CK"
), review.rating = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), review.date = c("4-Jun-18",
"4-Jun-18", "4-Jun-19", "4-Jun-20", "4-Jun-21", "4-Jun-17", "4-Jun-16",
"4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18",
"4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18",
"4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", 
"4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18",
"4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18",
"4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18",
"4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18", "4-Jun-18",
"4-Jun-18"), verified.purchase = c(FALSE, FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE,
TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE), word = c("initial", "box", "review", "headphones", "pro",
"audio", "experience", "excellent", "home", "studio", "personal",
"listening", "headphones", "dj", "quality", "talking", "weekend", 
"warrior", "occasional", "party", "dj", "yeah", "professional",
"commercial", "dj", "subjects", "lot", "physical", "punishment",
"absolutely", "real", "headphones", "positive", "star", "reasons",
"sound", "quality", "price", "hard", "pressed", "real", "notable",
"difference", "dollar", "studio", "grade", "sennheiser", "bose",
"headphone", "driver")), row.names = c(NA, 50L), class = "data.frame")
sourlemonaid
  • 504
  • 2
  • 6
  • 19
  • 2
    Your code hits an error with the supplied dput - only one of the words is present in the `get_sentiments` data frame, so there is only one row left after the inner join. – Allan Cameron Apr 09 '22 at 14:14
  • @AllanCameron Is right, I cannot plot a graph with the posted data. Is `corp` bigger? If so, can you edit the question with the output of `dput(head(corp, 30))` – Rui Barradas Apr 09 '22 at 14:30
  • @RuiBarradas posting it now, But i think that might still not be enough. in total the code is about 6k lines – sourlemonaid Apr 10 '22 at 00:24
  • Please post a more minimal example that reproduces your problem. – jdobres Apr 10 '22 at 01:23
  • As it is not quite clear what you mean with label, I've added two threads which should give solutions for both cases- direct labels (in the plot), and labels in the legend (which I believe is what you're looking for). The idea is to make correct use of the aesthetic (use the variable in aes() – tjebo Apr 10 '22 at 11:39

0 Answers0