0

I tried to take the average of SF_Plante_Verte and SF_Plante_Totale acccording to their Date_obs.

df<-structure(list(Pos_heliaphen = c("X47", "W17", "Z17", "X47", 
                                     "Y19", "Y40", "X47", "Y19", "Y40", "Z17", "Z31", "X47", "Y19", 
                                     "Y40", "Z31", "X47", "Z17", "Z31"), traitement = c("WW", "WW", 
                                                                                        "WW", "WW", "WW", "WW", "WW", "WW", "WW", "WW", "WW", "WW", "WW", 
                                                                                        "WW", "WW", "WW", "WW", "WW"), Variete = c("Blancas", "Blancas", 
                                                                                                                                   "Blancas", "Blancas", "Blancas", "Blancas", "Blancas", "Blancas", 
                                                                                                                                   "Blancas", "Blancas", "Blancas", "Blancas", "Blancas", "Blancas", 
                                                                                                                                   "Blancas", "Blancas", "Blancas", "Blancas"), Date_obs = c("D11_04/06/2021", 
                                                                                                                                                                                             "D11_04/06/2021", "D11_04/06/2021", "D12_07/06/2021", "D12_07/06/2021", 
                                                                                                                                                                                             "D12_07/06/2021", "D23_25/06/2021", "D23_25/06/2021", "D23_25/06/2021", 
                                                                                                                                                                                             "D23_25/06/2021", "D23_25/06/2021", "D24_28/06/2021", "D24_28/06/2021", 
                                                                                                                                                                                             "D24_28/06/2021", "D24_28/06/2021", "D25_29/06/2021", "D25_29/06/2021", 
                                                                                                                                                                                             "D25_29/06/2021"), SF_Plante_Totale = c(46473, 44589.3, 43134, 
                                                                                                                                                                                                                                     166645.5, 119962.5, 93061.5, 483583.8, 313985.7, 273432.6, 414871.8, 
                                                                                                                                                                                                                                     426766.2, 539410.2, 337417.5, 273432.6, 474915, 539410.2, 414871.8, 
                                                                                                                                                                                                                                     474915), SF_Plante_Verte = c(46473, 44589.3, 43134, 162512.7, 
                                                                                                                                                                                                                                                                  119962.5, 93061.5, 462655.2, 293367.9, 238373.1, 363123.6, 407572.2, 
                                                                                                                                                                                                                                                                  473793.6, 316799.7, 238373.1, 420682.5, 473793.6, 363123.6, 420682.5
                                                                                                                                                                                                                                     ), SF_Plante_senescence = c(0, 0, 0, 4132.8, 0, 0, 20928.6, 20617.8, 
                                                                                                                                                                                                                                                                 35059.5, 51748.2, 19194, 65616.6, 20617.8, 35059.5, 54232.5, 
                                                                                                                                                                                                                                                                 65616.6, 51748.2, 54232.5)), class = c("tbl_df", "tbl", "data.frame"
                                                                                                                                                                                                                                                                 ), row.names = c(NA, -18L))

With the code below, I want to draw a dotted line, but I want to get smooth curves instead of polylines (no polyline segments).And I can't add legend successfully either.

Could anyone please help save my problem? Thank you in advance!

ggplot(df, aes(x = Date_obs)) + 
  stat_summary(aes(y = SF_Plante_Totale,group=1), fun =mean, colour="white",shape=21,size=4,fill="steelblue",geom="point",group=1)+
  stat_summary(aes(y = SF_Plante_Totale,group=1), fun =mean,colour="steelblue", geom="smooth", group=1)+
  stat_summary(aes(y = SF_Plante_Verte,group=1), fun =mean, colour="white",shape=21,size=4,fill="tomato",geom="point",group=2)+
  stat_summary(aes(y = SF_Plante_Verte,group=1), fun =mean,colour="tomato", geom="smooth", group=1)+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

enter image description here

Chouette
  • 153
  • 7
  • Does this answer your question? [Add legend to ggplot2 line plot](https://stackoverflow.com/questions/10349206/add-legend-to-ggplot2-line-plot) – Dan Adams Apr 07 '22 at 02:54
  • The link refer to `geom_line`. I guess my graph also has `geom_point`, so it didn't work.@Dan Adams – Chouette Apr 07 '22 at 14:44

1 Answers1

2

There are a few issues here.

  1. You don't get a legend because you specify the color manually to each layer outside of aes(). {ggplot2} won't automatically make a legend for things specified outside of aes().
  2. You can't access the different groups as a single variable to provide in aes() because your data is currently in 'wide' format where you have multiple columns containing the same type of data and the column names are simple metadata distinguishing those measurements. The solution is to convert to 'long' format. For this, I use tidyr::pivot_longer().
  3. To get your desired colors you can just use scale_color_manual().
  4. If you want to fit a smoothed curve rather than just a point-to-point line, you can use loess smoothing and simply adjust the span parameter to control how smooth or bumpy it is.
  5. You can just change the linetype to get a dotted/dashed line. See here for more info on available options to control linetype.
  6. If you want to separately visualize data from different Pos_heliaphen groups, it will help to add another aesthetic to distinguish them. For example you could use shape to distinguish the points and linetype to distinguish the smoothed lines.
  7. I used interaction() to create all the combinations of the grouping variables.
library(tidyverse)

df %>% 
  select(1, 4:6) %>% 
  pivot_longer(starts_with("SF")) %>% 
  ggplot(aes(Date_obs, value, color = name, group = interaction(name, Pos_heliaphen))) +
  geom_point(stat = "summary", size = 4, aes(shape = Pos_heliaphen)) +
  geom_smooth(method = "loess", se = F, span = 5, aes(linetype = Pos_heliaphen)) +
  scale_color_manual(values = c("steelblue", "tomato")) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Created on 2022-04-07 by the reprex package (v2.0.1)

Dan Adams
  • 4,971
  • 9
  • 28
  • Hello Thank you so much for your answer! It did give me inspirations! I uploaded the more complete data. As you can see, the are more columns than before, I tried to use `pivot_longer(-Date_obs, names_to = c("SF_Plante_Verte", "SF_Plante_Totale"))` to select these two columns, but it didn't work. – Chouette Apr 07 '22 at 14:09
  • And There are different **Pos_heliaphen** for a same **Date_obs**. I want to calculate the mean value for each **Date_obs** for the dotted curve (instead of polyline segments). Sorry to bother you again! @Dan Adams – Chouette Apr 07 '22 at 14:22
  • `SF_Plante_Totale` and `SF_Plante_Verte` are currently `datetime` format. Is that your intention? When you plot it it is being treated as a plain `numeric` anyhow so you should be explicit if you're actually measuring time in those variables or if some other units. – Dan Adams Apr 07 '22 at 15:15
  • No, I didn't intend to have datetime format. I have uploaded the newest data. But I still don't figure out how to achieve what I want.@Dan Adams – Chouette Apr 07 '22 at 15:27
  • It will help if you can give a better idea of your expected output. Either an example graph from somewhere else, hand draw even, or just more explicit written description of how it should look with notes on which aesthetic parameters are mapped to which data features. – Dan Adams Apr 07 '22 at 15:30
  • It works very well! Thank you so so so much!@Dan Adams – Chouette Apr 08 '22 at 10:06