1

If we want to add a column for the sum of a specific condition within a group.

data <- data.frame(id=c(rep(1, 4), rep(2, 3), rep(3, 3), rep(4,4)), 
               condition=c(1, 1, 0, 0, 1, 1, 0, 1, 1, 0,1,1,0,0),
               count=c(1, 2, 0, 0, 1, 2, 0, 1, 2, 0,1,2,0,0), 
               firstexosure=c(1, 0, 0, 0, 1, 0, 0, 1, 0, 0,1,0,0,0), 
               outcome=c(0, 0, 0, 1, 0, 0, 1, 0, 0, 1,0,0,1,0), 
               time=c(100, 250, 220, 300, 240, 380, 150, 200, 320, 360,100,210,220,235) ) 




data<-data%>%group_by(id,condition)%>%summarise(sum= sum(time))

I would like to add one more column like this.

data <- data.frame(id=c(rep(1, 4), rep(2, 3), rep(3, 3), rep(4,4)), 
               condition=c(1, 1, 0, 0, 1, 1, 0, 1, 1, 0,1,1,0,0),
               count=c(1, 2, 0, 0, 1, 2, 0, 1, 2, 0,1,2,0,0), 
               firstexosure=c(1, 0, 0, 0, 1, 0, 0, 1, 0, 0,1,0,0,0), 
               outcome=c(0, 0, 0, 1, 0, 0, 1, 0, 0, 1,0,0,1,0), 
               time=c(100, 250, 220, 300, 240, 380, 150, 200, 320, 360,100,210,220,235),
               sum=c(350, 350,520,520,620,620,150,520,520,360,310,310,455,455)) 

How can I write this in R ?

camille
  • 16,432
  • 18
  • 38
  • 60
user224050
  • 317
  • 3
  • 10

1 Answers1

1

Many ways to do this.

We can use base R.

data$sum <- ave(data$time, data$id, data$condition, FUN = sum)
data
#    id condition count firstexosure outcome time sum
# 1   1         1     1            1       0  100 350
# 2   1         1     2            0       0  250 350
# 3   1         0     0            0       0  220 520
# 4   1         0     0            0       1  300 520
# 5   2         1     1            1       0  240 620
# 6   2         1     2            0       0  380 620
# 7   2         0     0            0       1  150 150
# 8   3         1     1            1       0  200 520
# 9   3         1     2            0       0  320 520
# 10  3         0     0            0       1  360 360
# 11  4         1     1            1       0  100 310
# 12  4         1     2            0       0  210 310
# 13  4         0     0            0       1  220 455
# 14  4         0     0            0       0  235 455

Or we can use the data.table package.

library(data.table)

setDT(data)[, sum := sum(time), by = .(id, condition)][]
#     id condition count firstexosure outcome time sum
#  1:  1         1     1            1       0  100 350
#  2:  1         1     2            0       0  250 350
#  3:  1         0     0            0       0  220 520
#  4:  1         0     0            0       1  300 520
#  5:  2         1     1            1       0  240 620
#  6:  2         1     2            0       0  380 620
#  7:  2         0     0            0       1  150 150
#  8:  3         1     1            1       0  200 520
#  9:  3         1     2            0       0  320 520
# 10:  3         0     0            0       1  360 360
# 11:  4         1     1            1       0  100 310
# 12:  4         1     2            0       0  210 310
# 13:  4         0     0            0       1  220 455
# 14:  4         0     0            0       0  235 455
www
  • 38,575
  • 12
  • 48
  • 84