Sum variables by group

Question

I want to reduce lines of code to group different variables and add without removing the variables from my base.

This is what I did (It works but the code is a bit long)

test1
names(test1)
#[1] "id"                            "DPA_DESPAR"                   
#[3] "DPA_VALOR"                     "DPA_ANIO"                     
#[5] "DPA_CANTON"                    "DPA_DESCAN"                   
#[7] "DPA_PROVIN"                    "DPA_DESPRO"                   
#...
#[19] "exp_omega"                     "total_empl"       
#...         
#[23] "geometry"                     

ss1= test1 %>% 
  group_by(DPA_CANTON) %>% 
  summarise(tot1= sum(total_empl)) 

ss2 = test1 %>% 
  group_by(id) %>% 
  summarise(tot2 = sum(total_empl)) 

test1 = left_join(test1, ss1, by = 'DPA_CANTON')  %>% 
  left_join(., ss2, by = 'id')

test1[1:10,21:22]
#tot1                      tot2
#1            37781         23049
#2            37781          1456
#3            37781          1198
#4            37781          2253
#5            37781           800
#6            37781          1705
#7            37781           615
#8            37781          4663
#9            37781          2042
#10            6593          5022

This does not work

I used this

aggregate(test1$total_empl, by=list(Category=test1[c(1,5)]), FUN=sum)
#Error in aggregate.data.frame(as.data.frame(x), ...) : 
#  arguments must have same length

Data

test1 = structure(list(id= c("020150", "020151", 
"020153", "020155", "020156", "020157", "020158", "020159", "020160", 
"020250", "020251", "020350", "020351", "020353", "020354", "020450", 
"020550", "020551", "020552", "020553"), DPA_CANTON = c("0201", 
"0201", "0201", "0201", "0201", "0201", "0201", "0201", "0201", 
"0202", "0202", "0203", "0203", "0203", "0203", "0204", "0205", 
"0205", "0205", "0205"), total_empl= c(23049, 1456, 
1198, 2253, 800, 1705, 615, 4663, 2042, 5022, 1571, 2481, 1077, 
973, 441, 4689, 4884, 875, 1046, 341)), row.names = c(NA, 20L
), class = "data.frame")

score 3 · Answer 1 · answered Sep 26 '20 at 19:37

You could stick to base:

transform(
  test1,
  tot1 = ave(total_empl, DPA_CANTON, FUN = sum),
  tot2 = ave(total_empl, id, FUN = sum)
)

Output:

       id DPA_CANTON total_empl  tot1  tot2
1  020150       0201      23049 37781 23049
2  020151       0201       1456 37781  1456
3  020153       0201       1198 37781  1198
4  020155       0201       2253 37781  2253
5  020156       0201        800 37781   800
6  020157       0201       1705 37781  1705
7  020158       0201        615 37781   615
8  020159       0201       4663 37781  4663
9  020160       0201       2042 37781  2042
10 020250       0202       5022  6593  5022
11 020251       0202       1571  6593  1571
12 020350       0203       2481  4972  2481
13 020351       0203       1077  4972  1077
14 020353       0203        973  4972   973
15 020354       0203        441  4972   441
16 020450       0204       4689  4689  4689
17 020550       0205       4884  7146  4884
18 020551       0205        875  7146   875
19 020552       0205       1046  7146  1046
20 020553       0205        341  7146   341

score 1 · Answer 2 · answered Sep 26 '20 at 20:36

1

if you open to use data.table then this gives the same result:

library(data.table)
setDT(test1)
test1[, tot1 := sum(total_empl), by = DPA_CANTON][, tot2 := sum(total_empl), by = id]
test1[1:10, c("tot1", "tot2")]

answered Sep 26 '20 at 20:36

Will

910
7
17

jdobres · Accepted Answer · 2020-09-26T19:47:52.093

0

Sticking to the tidyverse, you could use mutate instead of summarize. mutate allows you to calculate new columns while preserving all of the original data.

test1 <- test1 %>% 
  group_by(DPA_CANTON) %>% 
  mutate(tot1 = sum(total_empl)) %>% 
  group_by(id) %>% 
  mutate(tot2 = sum(total_empl))

edited Sep 26 '20 at 19:47

answered Sep 26 '20 at 19:42

jdobres

11,339
1
17
37

Sum variables by group

3 Answers3