0

I have daily rainfall data which I have converted to yearwise cumulative value using following code

library(seas)
library(data.table)
library(ggplot2)

#Loading data
data(mscdata)
dat <- (mksub(mscdata, id=1108447))
dat$julian.date <- as.numeric(format(dat$date, "%j"))
DT <- data.table(dat)
DT[, Cum.Sum := cumsum(rain), by=list(year)]

df <- cbind.data.frame(day=dat$julian.date,cumulative=DT$Cum.Sum)

But when I am trying to plot, it gives me weired output like

#Plotting using base R
df <- df[order(df[,1]),]
plot(df$day, df$cumulative, type="l", xlab="Day", ylab="Cumulative rainfall")

enter image description here

Same I am getting using ggplot2 also like

#Plotting using ggplot2
ggplot(df, aes(x = day, y = cumulative)) + geom_line()

enter image description here

But I want to get lines for every year may be in grey colour and mean value over the years in red colour like the following figure

enter image description here

How to achieve this?

UseR10085
  • 7,120
  • 3
  • 24
  • 54

2 Answers2

1

So, as you can see, you lost the group in geom_line. Without group, ggplot will connect all dots sharing with the axis.x. Here is an example to add 'year' as the group and calculate the mean value for each day.

library(reshape2)

data(mscdata)
dat <- (mksub(mscdata, id=1108447))
dat$julian.date <- as.numeric(format(dat$date, "%j"))
DT <- data.table(dat)
DT[, Cum.Sum := cumsum(rain), by=list(year)]

dt <- cbind.data.frame(day=dat$julian.date,cumulative=DT$Cum.Sum,year=DT$year)
TB <- melt(dt, id.vars = c('day','year'))
Mean_l = colMeans(reshape(TB[c("day",'year','value')],timevar='day',idvar = 'year', direction = 'wide'),na.rm = T)
Mean_l= Mean_l[-1]
Mean_l <- data.frame(day=c(1:length(Mean_l)),Mean_l)

TB_f <- data.frame(TB,avr=Mean_l$Mean_l[match(TB$day,Mean_l$day)])

ggplot(TB_f,aes(day,value))+ geom_line(aes(group=year))+ geom_line(aes(y=avr),color='red')+ theme_light()

enter image description here

UseR10085
  • 7,120
  • 3
  • 24
  • 54
karobben
  • 21
  • 3
  • Thank you very much. Your answer works but it appears a bit messy which can be reduced using `tidyverse` package. – UseR10085 Jun 25 '20 at 11:38
1

Add the group aesthetic to tell ggplot to group by the year and add stat_summary to draw the red line (without the grouping).

library(ggplot2)

ggplot(DT, aes(x = julian.date, y = Cum.Sum, group=year)) + 
  geom_line(col="grey") +
  labs(x="Date", y="Cumulative sum") +
  stat_summary(aes(group=NULL), fun="mean", geom="line", col="red", se="none", lwd=1)

enter image description here

For base graphics, it's a little more involved:

par(mar=c(4,3.5,1,1))
plot(df$day, df$cumulative, type="n", xlab="Day", ylab="Cumulative rainfall", las=1)
grid()

lapply(split(df, df$year), FUN=function(x) 
     with(x, lines(day, cumulative, col="grey", lwd=0.5)))

with(aggregate(cumulative~day, FUN=mean, data=df), 
    lines(x=day, y=cumulative, lwd=2, col="red"))
Edward
  • 10,360
  • 2
  • 11
  • 26
  • Excellent answer... Please visit [this](https://stackoverflow.com/questions/62574268/applying-yearwise-segmented-regression-in-r), I think you will be able to answer that question also. – UseR10085 Jun 25 '20 at 12:16