I have a data frame with several distinct columns.
Each column has several different gene names.
I would like to know:
- if there are repeated gene names in the whole data frame,
- if possible, how many times each gene is repeated.
This is part of my data frame:
DS_struct <-
structure(
list(
`12941` = c("", "", "", "", ""),
`14520` = c("ABAT",
"ABCA6", "ABCA8", "ABCB4", "ABCG2"),
`22405` = c("ACSL4", "ADFP",
"ADH1A", "ADH1B", "ADH1C"),
`25097` = c("AATF", "ABCB8", "ABLIM3",
"ACCN2", "ACSM3"),
`33006` = c("ADAMTS1", "ADAMTS13", "ADGRA3",
"ADGRG7", "ADH1B"),
`36376` = c("ACAA2", "ACACB", "ACAD11", "ACOT12",
"ACSL1"),
`39791` = c("ABAT", "ACACB", "ACSL4", "ACSM5", "ADAMTSL2"),
`41804` = c("A2M-AS1", "A2MP1", "AADAT", "ABCA8", "ACADL"),
`46408` = c("A1CF", "A2M", "AADAT", "AASS", "ABAT"),
`50579` = c("AASS",
"ABAT", "ABCA8", "ABCB10", "ABLIM2"),
`55191` = c("", "",
"", "", ""),
`57555` = c("", "", "", "", ""),
`57957` = c("ACSL4",
"ACSM3", "ADAMTSL2", "ADGRG2", "ADH1B"),
`57958` = c("",
"", "", "", ""),
`58043` = c("", "", "", "", ""),
`60502` = c("ABAT",
"ABCA6", "ABCA8", "ABCB4", "ABT1"),
`62232` = c("AADAT",
"AASS", "AASS", "ABCA8", "ABCC4"),
`76427` = c("ADGRG7",
"ADIRF", "ALPL", "ANXA10", "ASPDH"),
`84005` = c("", "",
"", "", ""),
`84402` = c("AADAT", "AASS", "ABAT", "ABCA6",
"ABCA8"),
`89186` = c("", "", "", "", ""),
`101685` = c("AADAT",
"AASS", "ABAT", "ABCA9", "ABCC4"),
`101728` = c("5-??", "5_8S_rRNA",
"A1BG", "A2M", "AACS"),
`113996` = c("", "", "", "", ""),
`117361` = c("", "", "", "", ""),
`121248` = c("ABI3BP",
"ACADL", "ACOT12", "ACSL4", "ACSM3"),
`136247` = c("", "",
"", "", ""),
`138178` = c("", "", "", "", ""),
`166163` = c("",
"", "", "", "")
),
row.names = 2:6,
class = "data.frame"
)