1

Here is part of my data frame.

> df
  ACSK AEUJ AEXF AIWT ALGN AMFQ             Pathway
1    1    0    0    1    0    0    Genome_integrity
2    0    0    0    1    0    1                PI3K
3    0    0    0    0    0    0                  TF
4    0    0    0    0    1    0             RTK_RAS
5    0    0    0    0    1    0                PI3K
6    0    0    0    1    1    0 Epigenetic_modifier
7    0    0    1    0    0    0                PI3K

I want to merge the rows with the same value in the "Pathway" coulumn and calculate the sum of the merged cells. Below is the expected output.

> df2
  ACSK AEUJ AEXF AIWT ALGN AMFQ             Pathway
1    1    0    0    1    0    0    Genome_integrity
2    0    0    1    1    1    1                PI3K
3    0    0    0    0    0    0                  TF
4    0    0    0    0    1    0             RTK_RAS
5    0    0    0    1    1    0 Epigenetic_modifier

DATA

structure(list(ACSK = c(1, 0, 0, 0, 0, 0, 0), AEUJ = c(0, 0, 
0, 0, 0, 0, 0), AEXF = c(0, 0, 0, 0, 0, 0, 1), AIWT = c(1, 1, 
0, 0, 0, 1, 0), ALGN = c(0, 0, 0, 1, 1, 1, 0), AMFQ = c(0, 1, 
0, 0, 0, 0, 0), Pathway = c("Genome_integrity", "PI3K", "TF", 
"RTK_RAS", "PI3K", "Epigenetic_modifier", "PI3K")), row.names = c(NA, 
7L), class = "data.frame")
Lin Caijin
  • 599
  • 4
  • 10

2 Answers2

4

You can use across(everything()) from dplyr.

library(dplyr)
df1 %>%
  group_by(Pathway) %>%
  summarise(across(everything(), ~sum(.)))

  Pathway              ACSK  AEUJ  AEXF  AIWT  ALGN  AMFQ
  <chr>               <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Epigenetic_modifier     0     0     0     1     1     0
2 Genome_integrity        1     0     0     1     0     0
3 PI3K                    0     0     1     1     1     1
4 RTK_RAS                 0     0     0     0     1     0
5 TF                      0     0     0     0     0     0
deschen
  • 10,012
  • 3
  • 27
  • 50
Park
  • 14,771
  • 6
  • 10
  • 29
0

A base method:

aggregate(.~Pathway,df,sum)

#              Pathway ACSK AEUJ AEXF AIWT ALGN AMFQ
#1 Epigenetic_modifier    0    0    0    1    1    0
#2    Genome_integrity    1    0    0    1    0    0
#3                PI3K    0    0    1    1    1    1
#4             RTK_RAS    0    0    0    0    1    0
#5                  TF    0    0    0    0    0    0

Also,

df1<-aggregate(cbind(df[,1],df[,2],df[,3],df[,4],df[,5],df[,6]),
               list(df$Pathway),sum)
names(df1)<-c('Pathway','ACSK','AEUJ','AEXF','AIWT','ALGN','AMFQ')

#              Pathway ACSK AEUJ AEXF AIWT ALGN AMFQ
#1 Epigenetic_modifier    0    0    0    1    1    0
#2    Genome_integrity    1    0    0    1    0    0
#3                PI3K    0    0    1    1    1    1
#4             RTK_RAS    0    0    0    0    1    0
#5                  TF    0    0    0    0    0    0
DeBARtha
  • 460
  • 6
  • 17