I want to creae a timeline plot showing four different data series. The first data entry is a singlepoint though, from which the four series should originate from, but they don't. How can i tell all of them to start at t0 not at t1, without manually altering my original dataframe?
This is a mixed problem. Either the Dataframe is manipulated or ggplot can be told what is wanted.
So I measured a substance that is then split in four different reactors and after the first day measured seperatly. My dataframe (plotdataframe =pldf) has one entry for t0 and four entries from then onwards. y is the measurmentvalue and shape is the differentiation of the four lines.
The differentiation ("Versuchsbezeichnung" & "Reaktorbezeichung") is "All" for t0 and onwards they are "30.1", "30.2", "50.1" and "50.2".
The df has a column for the experiment (2 levels) and the reactors (5 leves, "All" for t0 and "30.1", "30.2", "50.1" and "50.2" for the rest. With every day (t) and reactor there are about 20 rows associated.
here is part of the pldf with dput() and reduced to t0, t1 and t2 (still looks messy, sorry) :
structure(list(X = c(599L, 619L, 639L, 659L, 679L, 699L, 719L,
738L, 757L, 776L, 796L, 816L, 836L, 856L, 875L, 894L, 914L, 934L,
954L, 974L, 994L, 1013L, 1032L, 1052L, 1072L, 1092L, 1112L, 1132L,
1151L, 1171L, 1191L, 1211L, 1231L, 1292L, 1312L, 1332L, 1352L,
1372L, 1393L, 1414L, 1435L, 1456L, 1477L, 1498L, 1519L, 1540L,
1560L, 1581L, 1602L, 1623L, 1788L, 1809L, 1830L, 1851L, 1872L,
1893L, 1914L, 1935L, 1956L, 1977L, 1998L, 2019L, 2040L, 2061L,
2082L, 2103L, 2124L, 2145L, 2166L, 2187L, 2208L, 2228L, 2249L,
2270L, 2291L, 2312L, 2333L, 2354L, 2375L, 2396L, 2417L, 2438L
), Datum.Probenahme = structure(c(2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 14L, 15L, 15L, 15L, 15L,
16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L,
19L, 19L, 19L, 20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L, 22L, 22L,
22L, 22L, 1L, 1L, 1L, 1L), .Label = c("01.07.2019", "05.06.2019",
"06.06.2019", "07.06.2019", "08.06.2019", "09.06.2019", "10.06.2019",
"11.06.2019", "12.06.2019", "13.06.2019", "14.06.2019", "15.06.2019",
"16.06.2019", "20.06.2019", "21.06.2019", "22.06.2019", "23.06.2019",
"24.06.2019", "25.06.2019", "26.06.2019", "28.06.2019", "30.06.2019"
), class = "factor"), Uhrzeit.Probenahme = structure(c(50L, 12L,
10L, 11L, 4L, 8L, 8L, 14L, 12L, 12L, 13L, 8L, 12L, 8L, 19L, 12L,
12L, 22L, 15L, 17L, 20L, 24L, 12L, 12L, 31L, 18L, 22L, 23L, 30L,
5L, 9L, 20L, 16L, 7L, 11L, 19L, 21L, 15L, 20L, 30L, 26L, 1L,
2L, 6L, 3L, 38L, 35L, 39L, 43L, 46L, 20L, 23L, 28L, 30L, 20L,
22L, 25L, 28L, 28L, 30L, 33L, 35L, 43L, 36L, 27L, 32L, 29L, 34L,
37L, 42L, 35L, 45L, 41L, 48L, 49L, 47L, 44L, 40L, 35L, 35L, 35L,
35L), .Label = c("09:23", "09:29", "09:37", "09:40", "09:42",
"09:43", "09:47", "09:50", "09:51", "09:52", "09:55", "10:00",
"10:05", "10:06", "10:10", "10:11", "10:12", "10:15", "10:16",
"10:20", "10:24", "10:25", "10:30", "10:34", "10:35", "10:36",
"10:37", "10:40", "10:43", "10:45", "10:50", "10:51", "10:55",
"10:57", "11:00", "11:01", "11:03", "11:04", "11:05", "11:06",
"11:07", "11:09", "11:10", "11:13", "11:14", "11:15", "11:20",
"11:21", "11:25", "12:10"), class = "factor"), Versuchsbezeichnung = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("MUN", "WAS"), class = "factor"), Probenzeitpunkt = structure(c(1L,
2L, 5L, 6L, 7L, 8L, 9L, 10L, 2L, 5L, 6L, 7L, 8L, 9L, 10L, 2L,
5L, 6L, 7L, 8L, 9L, 10L, 2L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 11L,
11L, 11L, 12L, 12L, 12L, 12L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
1L, 2L, 2L, 2L, 2L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L,
7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 11L, 11L, 11L, 11L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L), .Label = c("t0", "t1", "t10", "t11",
"t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"), class = "factor"),
Reaktorbezeichnung = structure(c(1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 2L, 3L, 5L, 4L, 2L, 3L, 5L,
4L, 2L, 3L, 5L, 4L, 2L, 3L, 5L, 4L, 1L, 2L, 3L, 4L, 5L, 2L,
3L, 4L, 5L, 2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L,
2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 2L, 3L, 4L, 5L, 2L, 3L, 4L,
5L), .Label = c("Alle", "T30.1", "T30.2", "T50.1", "T50.2"
), class = "factor"), eingestellte_Temperatur = c(NA, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L,
50L, 50L, 50L, 30L, 30L, 50L, 50L, 30L, 30L, 50L, 50L, 30L,
30L, 50L, 50L, 30L, 30L, 50L, 50L, NA, 30L, 30L, 50L, 50L,
30L, 30L, 50L, 50L, 30L, 30L, 50L, 50L, 30L, 30L, 50L, 50L,
30L, 30L, 50L, 50L, 30L, 30L, 50L, 50L, 30L, 30L, 50L, 50L,
30L, 30L, 50L, 50L, 30L, 30L, 50L, 50L), Parameter_lang = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = "substance", class = "factor"),
mittelwert = c(195.510146467184, 590.04453797041, 650.046710519628,
805.023970909082, 680.056275644273, 605.038628373497, 655.013494757377,
510.014882632685, 535.033292951283, 670.029819108804, 675.023180234177,
635.030532865876, 705.020509801388, 700.016350845922, 630.031948130196,
575.020958779739, 515.034675161434, 515.010423214009, 484.525917325803,
525.034977749943, 535.030449153043, 545.005715243447, 540.041962411875,
545.046361460258, 461.558379907788, 510.029446766068, 600.036907210817,
520.031560570813, 515.023911897655, 645.052575729496, 705.012975914544,
550.022784174821, 560.017740283, 630.066260242174, 675.022704031091,
545.053372419458, 540.016063199975, 700.028692986007, 604.992609005687,
545.038604346887, 500.023450688633, 685.031134646166, 765.044624693392,
575.045832850911, 545.018905606243, 9.30989225859789, 14.6996919068786,
20.2006993402116, 11.6396196601576, 9.81990236026879, 29.7991367947917,
29.1989874363788, 4.11997199560799, 4.44042193773157, 32.9990403063925,
57.5020579022581, 21.0483199248758, 22.0038515217912, 90.4030580857021,
89.000336284423, 8.34032878685458, 10.3604802171955, 96.1034261675684,
105.004535949448, 8.89931984186634, 9.92024857606832, 101.002678657527,
132.007031226063, 10.7206828322988, 10.7809908792647, 107.003512180122,
101.001589580275, 12.3191266322172, 10.0618514094455, 102.005883436079,
93.29828183675, 8.46966678357295, 5.40058933971296, 45.7032091854773,
49.8010426068819, 7.61983731205152, 6.9002829183213)), class = "data.frame", row.names = c(NA,
-82L))
This is what I try to work with:
ggplot(data = pldf, aes(x = day, y = mittelwert, col = Versuchsbezeichnung, shape = Reaktorbezeichnung)) +
geom_line() +
theme_classic() +
scale_y_continuous(breaks = seq(0, 1000, 100))
Resulting in: Current plot where the firstpoint for 2 different experiment runs are not connected to the rest of the data
A temporary fix for standard plotting was this:
reaktor301 <- pldf[pldf$Reaktorbezeichnung %in% c("Alle", "T30.1"),]
reaktor302 <- pldf[pldf$Reaktorbezeichnung %in% c("Alle", "T30.2"),]
reaktor501 <- pldf[pldf$Reaktorbezeichnung %in% c("Alle", "T50.1"),]
reaktor502 <- pldf[pldf$Reaktorbezeichnung %in% c("Alle", "T50.2"),]
but how can that work in ggplot? I mean how can i tell ggplot to draw data from four different dataframes?
Formerly i had not included the df and am sorry that it looks messy i dont know how to integrate it better (more pretty) since i am new to r and stackoverflow and all.
I want the graph to show lines from t0 to t11. I probably end up just copy the row of "Alle" for times in the original df. But if you know a way for the future, an answer would be most appreciated.