1

I would to know per harvestYear, the number of month where vhi falls below 40.

example: for year 1982=:3; year 1983= 3

I would like to keep the dataset as it is. Just to add an additional column.

following this post, I have tried this:

require(zoo)
df2$yearmon <- as.yearmon(paste(df2$harvestYear, df2$Month), "%Y %m")

df2= ddply(df2, .(yearmon), transform, drought_months=sum(vhi <= 40) )

Then I would sum the number of month by grouping. But it does not work.

df3= df2 %>%
      group_by(gid, country, xcoord, ycoord, harvestYear, vhi) %>%
      summarise(drought.vhi.month = sum(drought_months))%>%
      ungroup()

Dataset:

structure(list(gid = c(100468L, 100468L, 100468L, 100468L, 100468L, 
100468L, 100468L, 100468L, 100468L, 100468L, 104078L, 104078L, 
104078L, 104078L, 104078L, 104078L, 104078L, 104078L, 104078L, 
104078L, 104078L, 115615L, 115615L, 115615L, 115615L, 115615L, 
115615L, 115615L, 115615L, 115615L, 115615L, 115615L), country = c("Namibia", 
"Namibia", "Namibia", "Namibia", "Namibia", "Namibia", "Namibia", 
"Namibia", "Namibia", "Namibia", "Namibia", "Namibia", "Namibia", 
"Namibia", "Namibia", "Namibia", "Namibia", "Namibia", "Namibia", 
"Namibia", "Namibia", "Congo, Democratic Republic of (Zaire)", 
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)"
), xcoord = c("13.75", "13.75", "13.75", "13.75", "13.75", "13.75", 
"13.75", "13.75", "13.75", "13.75", "18.75", "18.75", "18.75", 
"18.75", "18.75", "18.75", "18.75", "18.75", "18.75", "18.75", 
"18.75", "27.25", "27.25", "27.25", "27.25", "27.25", "27.25", 
"27.25", "27.25", "27.25", "27.25", "27.25"), ycoord = c("-20.25", 
"-20.25", "-20.25", "-20.25", "-20.25", "-20.25", "-20.25", "-20.25", 
"-20.25", "-20.25", "-17.75", "-17.75", "-17.75", "-17.75", "-17.75", 
"-17.75", "-17.75", "-17.75", "-17.75", "-17.75", "-17.75", "-9.75", 
"-9.75", "-9.75", "-9.75", "-9.75", "-9.75", "-9.75", "-9.75", 
"-9.75", "-9.75", "-9.75"), Time = structure(c(4352, 4383, 4717, 
4414, 4442, 4473, 4748, 5082, 4779, 4807, 9862, 10135, 10166, 
10196, 9893, 9921, 9952, 10105, 10227, 10500, 10531, 13057, 13088, 
13118, 12904, 12935, 12965, 12996, 13027, 13422, 13453, 13483
), class = "Date"), harvestYear = c(1982L, 1982L, 1983L, 1982L, 
1982L, 1982L, 1983L, 1984L, 1983L, 1983L, 1997L, 1998L, 1998L, 
1998L, 1997L, 1997L, 1997L, 1998L, 1998L, 1999L, 1999L, 2005L, 
2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 2006L, 2006L, 
2006L), Month = c(12L, 1L, 12L, 2L, 3L, 4L, 1L, 12L, 2L, 3L, 
1L, 10L, 11L, 12L, 2L, 3L, 4L, 9L, 1L, 10L, 11L, 10L, 11L, 12L, 
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), vhi = c("51.0890450353229", 
"32.441240310669", "40.847339630127", "21.9409523010254", "23.7816608428955", 
"46.6207218170166", "23.5423679351807", "42.2399349212647", "19.0511212348938", 
"10.1214292049408", "59.2775287628174", "68.3184223175049", "63.1654891967773", 
"46.8237014770508", "66.5376491546631", "68.1310760498047", "70.3945808410645", 
"70.0717346191406", "38.8603992462158", "42.2310523986816", "47.7076560974121", 
"38.2882362365723", "32.5415182113648", "36.3100366592407", "48.1315612792969", 
"49.0768402099609", "52.1510925292969", "53.8123741149902", "50.4373531341553", 
"51.5549461364746", "51.573205947876", "46.0322494506836")), row.names = c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 40000L, 40001L, 40002L, 
40003L, 40004L, 40005L, 40006L, 40007L, 40008L, 40009L, 40010L, 
200000L, 200001L, 200002L, 200003L, 200004L, 200005L, 200006L, 
200007L, 200008L, 200009L, 200010L), class = "data.frame")
Darren Tsai
  • 32,117
  • 5
  • 21
  • 51
Myr TH
  • 175
  • 1
  • 9

1 Answers1

1

I see 3 drougth month (vhi <= 40) in the year 1982. Also the value in vhi is character so that needs to be changed to numeric first.

df2 %>%
  mutate(vhi = as.numeric(vhi)) %>%
  filter(harvestYear == 1982)

#     gid country xcoord ycoord       Time harvestYear Month      vhi
#1 100468 Namibia  13.75 -20.25 1981-12-01        1982    12 51.08905
#2 100468 Namibia  13.75 -20.25 1982-01-01        1982     1 32.44124
#3 100468 Namibia  13.75 -20.25 1982-02-01        1982     2 21.94095
#4 100468 Namibia  13.75 -20.25 1982-03-01        1982     3 23.78166
#5 100468 Namibia  13.75 -20.25 1982-04-01        1982     4 46.62072

You can try -

library(dplyr)

df2 %>%
  mutate(vhi = as.numeric(vhi)) %>%
  group_by(harvestYear) %>%
  mutate(drought_months = sum(vhi <= 40))

#     gid country xcoord ycoord Time       harvestYear Month   vhi drought_months
#    <int> <chr>   <chr>  <chr>  <date>           <int> <int> <dbl>          <int>
# 1 100468 Namibia 13.75  -20.25 1981-12-01        1982    12  51.1             3
# 2 100468 Namibia 13.75  -20.25 1982-01-01        1982     1  32.4             3
# 3 100468 Namibia 13.75  -20.25 1982-12-01        1983    12  40.8             3
# 4 100468 Namibia 13.75  -20.25 1982-02-01        1982     2  21.9             3
# 5 100468 Namibia 13.75  -20.25 1982-03-01        1982     3  23.8             3
# 6 100468 Namibia 13.75  -20.25 1982-04-01        1982     4  46.6             3
# 7 100468 Namibia 13.75  -20.25 1983-01-01        1983     1  23.5             3
# 8 100468 Namibia 13.75  -20.25 1983-12-01        1984    12  42.2             0
# 9 100468 Namibia 13.75  -20.25 1983-02-01        1983     2  19.1             3
#10 100468 Namibia 13.75  -20.25 1983-03-01        1983     3  10.1             3
# … with 22 more rows
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213