5

I have a csv file (crop_calendar.csv) containing information on development stages of crop in a particular region. Basically each row has the following structure:

crop_name   sowing_dat    emergence_date  flowering_date  maturity_date  harvest_date

which gives for example:

Winter_wheat    18.08   28.08   24.06   30.07   3.08
Winter_rye      18.08   28.08   15.06   23.07   29.07
Spring_wheat    27.04   10.05   1.07    4.08    7.08
Spring_barley   27.04   12.05   27.06   1.08    5.08

Now, I'd like to put that information in a graphic that looks like that: crop calendar example

Any idea how to do it with lots of crop (rows) and at different locations?

WAF
  • 1,141
  • 20
  • 44
  • 3
    Please provide a [reproducible example](http://stackoverflow.com/a/5963610/1412059) of what you have tried. – Roland Jul 23 '13 at 09:53
  • read it as a data.frame, split by location, create the plot for each subset – Dennis Guse Jul 23 '13 at 09:54
  • With a question and half an answer that might be a question as well, it's pretty hard to tell what your question is now and what the bounty is for. Maybe if the answers don't contain enough detail, move your own answer into your question so that people can see what you have so far. Then ask very specific questions? – Andy Clifton Aug 11 '13 at 03:32

4 Answers4

6

Here is an example assuming you have the day.of.year() of sowing and the duration (in days) of the three periods for each crop and each country.

The crop calendar

#making random numbers reproducible
set.seed(12345)
rawdata <- expand.grid(
  Crop = paste("Crop", LETTERS[1:8]), 
  Country = paste("Country", letters[10:13])
)
#day.of.year of sowing
rawdata$Sowing <- runif(nrow(rawdata), min = 0, max = 365)
#number of days until mid season
rawdata$Midseason <- runif(nrow(rawdata), min = 10, max = 30)
#number of days until harvest
rawdata$Harvest <- runif(nrow(rawdata), min = 20, max = 150)
#number of days until end of harvest
rawdata$Harvest.end <- runif(nrow(rawdata), min = 10, max = 40)

dataset <- data.frame(Crop = character(0), Country = character(0), Period = character(0), Duration = numeric(0))

#sowing around new year
last.day <- rowSums(rawdata[, c("Sowing", "Midseason")])
if(any(last.day >= 365)){
  dataset <- rbind(
    dataset,
    cbind(
      rawdata[last.day >= 365, c("Crop", "Country")],
      Period = "Sowing",
      Duration = last.day[last.day >= 365] - 365
    )
  )
  dataset <- rbind(
    dataset,
    cbind(
      rawdata[last.day >= 365, c("Crop", "Country")],
      Period = "Mid-season",
      Duration = rawdata$Harvest[last.day >= 365]
    )
  )
  dataset <- rbind(
    dataset,
    cbind(
      rawdata[last.day >= 365, c("Crop", "Country")],
      Period = "Harvest",
      Duration = rawdata$Harvest.end[last.day >= 365]
    )
  )
  dataset <- rbind(
    dataset,
    cbind(
      rawdata[last.day >= 365, c("Crop", "Country")],
      Period = NA,
      Duration = 365 - rowSums(rawdata[last.day >= 365, c("Midseason", "Harvest", "Harvest.end")])
    )
  )
  dataset <- rbind(
    dataset,
    cbind(
      rawdata[last.day >= 365, c("Crop", "Country")],
      Period = "Sowing",
      Duration = 365 - rawdata$Sowing[last.day >= 365]
    )
  )
  rawdata <- rawdata[last.day < 365, ]
}

#mid-season around new year
last.day <- rowSums(rawdata[, c("Sowing", "Midseason", "Harvest")])
if(any(last.day >= 365)){
  dataset <- rbind(
    dataset,
    cbind(
      rawdata[last.day >= 365, c("Crop", "Country")],
      Period = "Mid-season",
      Duration = last.day[last.day >= 365] - 365
    )
  )
  dataset <- rbind(
    dataset,
    cbind(
      rawdata[last.day >= 365, c("Crop", "Country")],
      Period = "Harvest",
      Duration = rawdata$Harvest.end[last.day >= 365]
    )
  )
  dataset <- rbind(
    dataset,
    cbind(
      rawdata[last.day >= 365, c("Crop", "Country")],
      Period = NA,
      Duration = 365 - rowSums(rawdata[last.day >= 365, c("Midseason", "Harvest", "Harvest.end")])
    )
  )
  dataset <- rbind(
    dataset,
    cbind(
      rawdata[last.day >= 365, c("Crop", "Country")],
      Period = "Sowing",
      Duration = rawdata$Midseason[last.day >= 365]
    )
  )
  dataset <- rbind(
    dataset,
    cbind(
      rawdata[last.day >= 365, c("Crop", "Country")],
      Period = "Mid-season",
      Duration = 365 - rowSums(rawdata[last.day >= 365, c("Sowing", "Midseason")])
    )
  )
  rawdata <- rawdata[last.day < 365, ]
}


#harvest around new year
last.day <- rowSums(rawdata[, c("Sowing", "Midseason", "Harvest", "Harvest.end")])
if(any(last.day >= 365)){
  dataset <- rbind(
    dataset,
    cbind(
      rawdata[last.day >= 365, c("Crop", "Country")],
      Period = "Harvest",
      Duration = last.day[last.day >= 365] - 365
    )
  )
  dataset <- rbind(
    dataset,
    cbind(
      rawdata[last.day >= 365, c("Crop", "Country")],
      Period = NA,
      Duration = 365 - rowSums(rawdata[last.day >= 365, c("Midseason", "Harvest", "Harvest.end")])
    )
  )
  dataset <- rbind(
    dataset,
    cbind(
      rawdata[last.day >= 365, c("Crop", "Country")],
      Period = "Sowing",
      Duration = rawdata$Midseason[last.day >= 365]
    )
  )
  dataset <- rbind(
    dataset,
    cbind(
      rawdata[last.day >= 365, c("Crop", "Country")],
      Period = "Mid-season",
      Duration = rawdata$Harvest[last.day >= 365]
    )
  )
  dataset <- rbind(
    dataset,
    cbind(
      rawdata[last.day >= 365, c("Crop", "Country")],
      Period = "Harvest",
      Duration = 365 - rowSums(rawdata[last.day >= 365, c("Sowing", "Midseason", "Harvest")])
    )
  )
  rawdata <- rawdata[last.day < 365, ]
}


#no crop around new year
dataset <- rbind(
  dataset,
  cbind(
    rawdata[, c("Crop", "Country")],
    Period = NA,
    Duration = rawdata$Sowing
  )
)
dataset <- rbind(
  dataset,
  cbind(
    rawdata[, c("Crop", "Country")],
    Period = "Sowing",
    Duration = rawdata$Midseason
  )
)
dataset <- rbind(
  dataset,
  cbind(
    rawdata[, c("Crop", "Country")],
    Period = "Mid-season",
    Duration = rawdata$Harvest
  )
)
dataset <- rbind(
  dataset,
  cbind(
    rawdata[, c("Crop", "Country")],
    Period = "Harvest",
    Duration = rawdata$Harvest.end
  )
)
dataset <- rbind(
  dataset,
  cbind(
    rawdata[, c("Crop", "Country")],
    Period = NA,
    Duration = 365 - rowSums(rawdata[, c("Sowing", "Midseason", "Harvest")])
  )
)

Labels <- c("", "Jan.", "Feb.", "Mar.", "Apr.", "May", "Jun.", "Jul.", "Aug.", "Sep.", "Okt.", "Nov.", "Dec.")
Breaks <- cumsum(c(0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31))
ggplot(dataset, aes(x = Crop, y = Duration, colour = Period, fill = Period)) + geom_bar(stat = "identity") + facet_wrap(~Country) + coord_flip() + scale_fill_manual(values = c("Sowing" = "darkgreen", "Mid-season" = "grey", "Harvest" = "yellow")) + scale_colour_manual(values = c("Sowing" = "black", "Mid-season" = "black", "Harvest" = "black"), guide = "none") + scale_y_continuous("", breaks = Breaks, labels = Labels, limits = c(0, 365)) + theme_bw() + theme(axis.text.x = element_text(hjust = 1))
Thierry
  • 18,049
  • 5
  • 48
  • 66
2

To add legend place "color=.." inside the aes() call in each geom_linerange() and then add scale_color_identity() with argument guide="legend" - this will use color names as actual colors. With labels= you can change labels in legend. To remove lines between months add minor_breaks=NULL inside the scale_y_date().

ggplot(inDf, aes(x=crop)) +
  geom_linerange(aes(ymin=sowing, ymax=emergence, color="green"), size=5) +
  geom_linerange(aes(ymin=emergence, ymax=flowering, color="green3"), size=5) +
  geom_linerange(aes(ymin=flowering, ymax=maturity, color="yellow"), size=5) +
  geom_linerange(aes(ymin=maturity, ymax=harvesting, color="red"), size=5) +
  coord_flip() +  
  scale_y_date(lim = c(as.Date("2012-08-15"), as.Date("2013-09-01")),
               breaks=date_breaks(width = "1 month"), labels = date_format("%b"),
               minor_breaks=NULL)+
  ggtitle('Crop Calendar')+ xlab("")+ylab("")+
  scale_color_identity("",guide="legend",
                       labels=c("emergence","flowering","maturity","harvesting"))

enter image description here

Didzis Elferts
  • 95,661
  • 14
  • 264
  • 201
1

It is a bit difficult to guess what you want to do. With only 3 dates you cannot reproduce the graph you show (requires 4 dates for each crop). It is also not clear what the numbers represent (presumably weeks?). If it is just a question about plotting, this will get you started. Otherwise, please clarify the question.

df <- read.table(text="crop_name   emergence_date  maturity_date  harvest_date
                 wheat        13.04           25.05          30.06
                 corn         12.02           21.30          23.11", header=TRUE)
require(ggplot2)
ggplot(df, aes(x=crop_name)) +
  geom_linerange(aes(ymin=emergence_date, ymax=maturity_date), color="green3", size=5) +
  geom_linerange(aes(ymin=maturity_date, ymax=harvest_date), color="yellow", size=5) +
  coord_flip() + ylim(0, 52)
shadow
  • 21,823
  • 4
  • 63
  • 77
  • Thanks! Note that dates are in the day.month format. The question now is how to handle that date format with the x axis... – WAF Jul 23 '13 at 11:56
1

Ok so compiling answers and with additional research, here is the solution I ended up with:

inDf <- read.table(text="crop     sowing  emergence  flowering   maturity harvesting
                         Spring barley 27/04/2013 12/05/2013 27/06/2013  1/08/2013  5/08/2013
                         Oats 27/04/2013 10/05/2013 29/06/2013  6/08/2013  8/08/2013
                         Maize 25/05/2013  6/06/2013 18/08/2013 10/09/2013 12/09/2013", header=TRUE)

inDf[, "sowing"]     <- as.Date(inDf[, "sowing"], format = '%d/%m/%Y')
inDf[, "emergence"]  <- as.Date(inDf[, "emergence"], format = '%d/%m/%Y')
inDf[, "flowering"]  <- as.Date(inDf[, "flowering"], format = '%d/%m/%Y')
inDf[, "maturity"]   <- as.Date(inDf[, "maturity"], format = '%d/%m/%Y')
inDf[, "harvesting"] <- as.Date(inDf[, "harvesting"], format = '%d/%m/%Y')

ggplot(inDf, aes(x=crop)) +
geom_linerange(aes(ymin=sowing, ymax=emergence), color="green", size=5) +
geom_linerange(aes(ymin=emergence, ymax=flowering), color="green3", size=5) +
geom_linerange(aes(ymin=flowering, ymax=maturity), color="yellow", size=5) +
geom_linerange(aes(ymin=maturity, ymax=harvesting), color="red", size=5) +
coord_flip() +  scale_y_date(lim = c(as.Date("2012-08-15"), as.Date("2013-09-01")),breaks=date_breaks(width = "1 month"), labels = date_format("%b"))+
ggtitle('Crop Calendar')+ xlab("")+ylab("")

which gives: enter image description here

BUT

I'd like now to add the legend and to remove all the white lines between each month. Any ideas? Thanks

WAF
  • 1,141
  • 20
  • 44