-1

I want one plot where all the lines of the same keyword are connected with each other and timestamp is on the X- and count is on the Y- axis. I would like each keyword to have a different color for its line and the line being labeled with the keyword.

Thanks to @Roman I managed to procure the following:

#load from csv. Fields are keyword,count,timestamp. Example line: "blablabla","123","2018/08/09"
csvdata <- read.csv("c:/mydataset.csv", header=TRUE, sep=",")

# merge fields into a common dataframe
xy <- data.frame(time, word = as.factor(csvdata$keyword), count = rpois(length(time), lambda = 5))

library(ggplot2)

# draw the chart
ggplot(xy, aes(x = time, y = count, color = word)) +
  theme_bw() +
  scale_color_brewer(palette = "Set1") +  # choose appropriate palette
  geom_line()

It does create the canvas and the correct legend. However, the points/lines are not drawn. What am I doing wrong?

Output of CSV: (Retracted for anonymity)

   > dput(csvdata)
structure(list(keyword = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 7L, 2L, 7L, 
2L, 7L, 2L, 7L, 2L, 7L, 2L, 2L, 7L, 2L, 7L, 10L, 10L, 10L, 10L, 
10L, 6L, 8L, 9L, 10L, 6L, 8L, 9L, 6L, 8L, 9L, 6L, 8L, 9L, 3L, 
6L, 3L, 3L, 3L, 3L), .Label = c("word word durch moreWords", 
"word word mit moreWords", "word word via otherWord", 
"word word moreWords", "word word otherWord", 
"word word otherWord", "word word von moreWords", 
"otherWord moreWords", "otherWord otherWord", 
"otherWord otherWord "), class = "factor"), count = c(3L, 
2L, 3L, 1L, 6L, 6L, 2L, 3L, 2L, 1L, 2L, 6L, 3L, 2L, 2L, 2L, 2L, 
2L, 2L, 3L, 2L, 6L, 1L, 4L, 5L, 5L, 7L, 3L, 5L, 4L, 4L, 3L, 3L, 
7L, 5L, 7L, 4L, 5L, 2L, 3L, 2L, 3L, 6L, 8L, 11L, 9L, 10L, 1L, 
4L, 1L, 3L, 2L, 5L, 2L, 2L, 2L, 2L, 2L, 3L, 8L, 3L, 2L, 3L, 3L, 
2L, 3L, 3L, 6L, 5L, 2L, 13L, 3L, 6L, 2L, 9L, 5L, 14L, 8L, 6L, 
4L, 1L, 7L, 4L, 1L, 7L, 9L, 2L, 4L, 2L, 2L, 3L, 2L, 2L, 7L, 4L, 
6L, 3L, 1L, 1L, 3L, 2L, 3L, 1L, 1L, 1L, 6L, 2L, 1L, 4L, 6L, 5L, 
6L, 3L, 3L, 6L, 1L, 5L, 2L, 1L, 1L, 2L), timestamp = structure(c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 
68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 
81L, 82L, 83L, 83L, 84L, 84L, 85L, 85L, 86L, 86L, 87L, 87L, 88L, 
89L, 89L, 90L, 90L, 90L, 91L, 92L, 93L, 94L, 95L, 95L, 95L, 95L, 
96L, 96L, 96L, 97L, 97L, 97L, 98L, 98L, 98L, 99L, 99L, 100L, 
101L, 102L, 103L), .Label = c("2018/08/09", "2018/08/10", "2018/08/11", 
"2018/08/12", "2018/08/13", "2018/08/14", "2018/08/15", "2018/08/16", 
"2018/08/17", "2018/08/18", "2018/08/19", "2018/08/22", "2018/08/23", 
"2018/08/24", "2018/08/25", "2018/08/26", "2018/08/29", "2018/08/30", 
"2018/08/31", "2018/09/01", "2018/09/03", "2018/09/06", "2018/09/13", 
"2018/09/14", "2018/09/15", "2018/09/16", "2018/09/17", "2018/09/18", 
"2018/09/19", "2018/09/20", "2018/09/21", "2018/09/22", "2018/09/23", 
"2018/09/24", "2018/09/25", "2018/09/26", "2018/09/27", "2018/09/28", 
"2018/09/29", "2018/09/30", "2018/10/01", "2018/10/02", "2018/10/03", 
"2018/10/04", "2018/10/05", "2018/10/06", "2018/10/07", "2018/10/09", 
"2018/10/10", "2018/10/11", "2018/10/12", "2018/10/13", "2018/10/14", 
"2018/10/15", "2018/10/16", "2018/10/17", "2018/10/18", "2018/10/19", 
"2018/10/25", "2018/10/28", "2018/10/29", "2018/10/30", "2018/10/31", 
"2018/11/01", "2018/11/02", "2018/11/03", "2018/11/04", "2018/11/05", 
"2018/11/06", "2018/11/07", "2018/11/09", "2018/11/10", "2018/11/11", 
"2018/11/12", "2018/11/13", "2018/11/14", "2018/11/15", "2018/11/16", 
"2018/11/17", "2018/11/18", "2018/11/19", "2018/11/20", "2018/11/21", 
"2018/11/22", "2018/11/23", "2018/11/24", "2018/11/25", "2018/11/26", 
"2018/11/27", "2018/11/28", "2018/11/29", "2018/12/02", "2018/12/04", 
"2018/12/05", "2018/12/06", "2018/12/07", "2018/12/08", "2018/12/09", 
"2018/12/12", "2018/12/13", "2018/12/14", "2018/12/15", "2018/12/16"
), class = "factor")), .Names = c("keyword", "count", "timestamp"
), class = "data.frame", row.names = c(NA, -121L))
camille
  • 16,432
  • 18
  • 38
  • 60
to_the_nth
  • 61
  • 11
  • could you share your data? E.g. using dput(csvdata) – P1storius Dec 17 '18 at 11:22
  • Added a sample. – to_the_nth Dec 17 '18 at 11:26
  • Your data sample is malformed. Also, how is `time` defined? – Roland Dec 17 '18 at 11:31
  • Updated the data. Now it's working. Sorry. Time is definited as the day in the form YYYY/MM/DD. Hours and minutes are not part of it. – to_the_nth Dec 17 '18 at 11:46
  • 1
    Of the 3 column names you reference in `ggplot`, only 1 is actually in your sample data. After matching the column names and plotting, you should have gotten this warning: "geom_path: Each group consists of only one observation. Do you need to adjust the group aesthetic?" That should point out to you that your "timestamp" is actually a factor. `geom_line` will expect continuous values on the x-axis – camille Dec 17 '18 at 16:18
  • There are many SO posts on this type of question. Here are two: https://stackoverflow.com/q/10357768/5325862 https://stackoverflow.com/q/41850189/5325862 At the very least, make sure your "timestamps" are treated as actual `Date` objects and your lines are grouped properly – camille Dec 17 '18 at 16:26

1 Answers1

0

I am unable to reproduce using the data you provided. It seems to be malformed. With some dummy data I am able to produce the plot you describe

library(ggplot2)

# create dummy data
xy <- data.frame(word=as.factor(c('a','a','b','b','c','c')),time=as.Date(c('2018/08/09','2018/08/06','2018/08/07','2018/08/08','2018/08/05','2018/08/03')), count = c(123,241,191,301,199,221))

# draw the chart
ggplot(xy, aes(x = time, y = count, color = word)) +
theme_bw() +
scale_color_brewer(palette = "Set1") +  # choose appropriate palette
geom_line()

Maybe you can take a look whether your dataframe xy looks as you expect and whether the column names as identical you the column names you specify in the ggplot() command

P1storius
  • 917
  • 5
  • 12