0

My data looks like this:

structure(list(did = c(216L, 221L, 221L, 221L, 221L, 221L, 221L, 
221L, 214L, 214L, 214L, 221L, 221L, 221L, 221L, 221L, 221L, 214L, 
221L, 221L, 221L, 221L, 205L, 221L, 221L), fruid = c(1007L, 1697L, 
1697L, 1698L, 1697L, 1697L, 1697L, 1697L, 2074L, 2074L, 2074L, 
2087L, 2087L, 2087L, 2087L, 2087L, 2087L, 2074L, 2087L, 2087L, 
3884L, 3884L, 2249L, 2087L, 1461L), hhid = c(89L, 5258L, 5256L, 
5192L, 5301L, 5289L, 5296L, 5255L, 16413L, 8911L, 21550L, 5683L, 
12001L, 5761L, 5667L, 5682L, 11988L, 21553L, 11996L, 11986L, 
12158L, 12191L, 10367L, 5676L, 1130L), isenterprise = c(1L, 1L, 
0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 1L, 1L, 1L, 0L, 1L), sale = c(11, 1750, 0, 4000, 0, 0, 
2450, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1600, 0, 0
), purchase = c(255, 750, 0, 3838, 0, 0, 1700, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), cash = c(99, 520, 0, 
1150, 0, 0, 1585, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
2500, 0, 0), asset = c(11, 1750, 0, 4000, 0, 0, 2450, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1600, 0, 0), islivestock = c(0L, 
0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), lstype = c("0", "0", "Goatry", 
"0", "Goatry", "Goatry", "0", "Goatry", "0", "0", "0", "0", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0"), 
    isagriculture = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
    ), tsavings = c(5L, 1120L, 1200L, 2150L, 2700L, 4220L, 400L, 
    400L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 840L, 0L, 0L), month_year = c("Dec - 2019", "Dec - 2019", 
    "Dec - 2019", "Dec - 2019", "Dec - 2019", "Dec - 2019", "Dec - 2019", 
    "Dec - 2019", "Jan - 2020", "Jan - 2020", "Jan - 2020", "Jan - 2020", 
    "Jan - 2020", "Jan - 2020", "Jan - 2020", "Jan - 2020", "Jan - 2020", 
    "Jan - 2020", "Jan - 2020", "Jan - 2020", "Jan - 2020", "Jan - 2020", 
    "Jan - 2020", "Jan - 2020", "Jan - 2020")), row.names = c(NA, 
25L), class = "data.frame")

The output that is expected is this:

did fruid month_year hhid times_visited isenterprise_count t_sale t_purchase t_cash t_asset t_saving islivestock_count t_sale t_purchase t_cash t_asset t_saving isagriculture_count t_sale t_purchase t_cash t_asset t_saving.

To explain the output I want to count the number of occurances of hhid month_year wise and along with it sum the sale purchase cash and asset and savings.

Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
Rahul
  • 25
  • 5

1 Answers1

1

You can group by the unique values and sum selected columns.

library(dplyr)

df %>%
  group_by(did, fruid, month_year) %>%
  summarise(count = n(), 
            across(c(sale:islivestock, isagriculture:tsavings), 
                   sum, na.rm = TRUE))
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
  • Thanks Ronak.I want to capture the sale purchase cash asset and savings for each ismicroenterprise, islivestock and is agriculture.I have been able to do it by filtering all three and creating 3 dataframes but need to do in a single dataframe.grouping by did fruid month_year hhid.Then counting hhid(hhid_count)for each month_year.Please let me know if there's an issue – Rahul Dec 01 '20 at 06:14
  • 1
    You should then include `ismicroenterprise` and other variables in `group_by`. Try : `df %>% group_by(did, fruid, month_year, isenterprise, islivestock, isagriculture) %>% summarise(count = n(), across(c(sale:asset, tsavings), sum, na.rm = TRUE))` – Ronak Shah Dec 01 '20 at 06:21
  • Thanks.Its working.I also want to count hhid tagged with all three(isenterprise say hh_enterprise as new column name,islivestock,say hh_livestock as new column name, isagriculture,say hh_agri as new column name).This would help in identifying number of hhid involved in different activities. – Rahul Dec 01 '20 at 07:16
  • I think you should ask that as a new question instead of extending it. – Ronak Shah Dec 01 '20 at 08:22