I am trying to unnest a dataset that I have produced with pivot_wider, in which there are multiple columns that need to be unnested. On the full dataset, the unnest function is not working (I get an error: >Error: Incompatible lengths: 3, 2.) so I tried a workaround Part of the dataset:
my_data <- structure(list(RNAcentral_id = c("URS000000C731", "URS000000C731",
"URS000000C731", "URS000000C731", "URS000001F3AA", "URS000001F3AA",
"URS000001F3AA", "URS000001F3AA", "URS000001F3AA", "URS000001F3AA",
"URS000001F3AA", "URS000001F3AA", "URS000001F3AA", "URS000001F3AA",
"URS0000023ED8", "URS0000023ED8", "URS0000023ED8", "URS0000023ED8",
"URS0000023ED8", "URS0000023ED8", "URS0000023ED8", "URS0000023ED8",
"URS0000023ED8", "URS0000023ED8", "URS0000023ED8", "URS0000023ED8",
"URS0000050C72", "URS0000050C72", "URS0000050C72", "URS0000050C72",
"URS0000050C72", "URS0000050C72", "URS0000050C72", "URS0000050C72",
"URS0000050C72", "URS0000050C72", "URS0000050C72", "URS0000050C72",
"URS0000050C72", "URS0000050C72", "URS0000050C72", "URS0000050C72",
"URS00000527A6", "URS00000527A6", "URS00000527A6", "URS00000527A6",
"URS00000527A6", "URS00000527A6", "URS00000527A6", "URS00000527A6",
"URS00000527A6", "URS000007CAC8", "URS000007CAC8", "URS000007CAC8",
"URS000007CAC8", "URS000007CAC8", "URS000007DA54", "URS000007DA54",
"URS000007DA54", "URS000007DA54", "URS000007DA54", "URS000007DA54",
"URS000007DA54", "URS000007DA54", "URS000007F1D7", "URS000007F1D7",
"URS000007F1D7", "URS000007F1D7", "URS000007F1D7", "URS000007F1D7",
"URS000007F1D7", "URS000007F1D7", "URS000007F1D7", "URS000007F1D7",
"URS0000088F47", "URS0000088F47", "URS0000088F47", "URS0000088F47",
"URS0000088F47", "URS0000088F47", "URS0000088F47", "URS00000B589B",
"URS00000B589B", "URS00000B589B", "URS00000B589B", "URS00000B589B",
"URS00000B589B", "URS00000B589B"), Database = c("ENSEMBL", "ENSEMBL",
"ENSEMBL", "GENCODE", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL",
"ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "GENCODE", "LNCIPEDIA",
"ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "GENECARDS", "LNCBOOK",
"LNCIPEDIA", "NONCODE", "NONCODE", "NONCODE", "NONCODE", "NONCODE",
"ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL",
"ENSEMBL", "GENCODE", "LNCBOOK", "NONCODE", "NONCODE", "NONCODE",
"NONCODE", "NONCODE", "NONCODE", "NONCODE", "ENSEMBL", "ENSEMBL",
"ENSEMBL", "GENCODE", "GENECARDS", "GENECARDS", "LNCBOOK", "LNCIPEDIA",
"NONCODE", "ENSEMBL", "ENSEMBL", "ENSEMBL", "GENCODE", "NONCODE",
"ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "GENCODE",
"LNCBOOK", "NONCODE", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL",
"GENCODE", "LNCBOOK", "NONCODE", "NONCODE", "NONCODE", "NONCODE",
"ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "GENCODE", "GENECARDS",
"LNCIPEDIA", "ENA", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL",
"ENSEMBL", "GENCODE"), RNA_type = c("lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "snoRNA", "snoRNA", "snoRNA", "snoRNA", "snoRNA", "snoRNA",
"snoRNA"), gene_name = c("ENSG00000250666.1", "ENSG00000281830.1",
"ENSG00000281377.1", "LINC01596", "ENSG00000242086.8", "ENSG00000280512.2",
"ENSG00000281603.2", "ENSG00000281060.2", "ENSG00000281794.2",
"ENSG00000281915.2", "ENSG00000280993.2", "ENSG00000282953.1",
"MUC20-OT1", "lnc-MUC20-67", "ENSG00000235273.1", "ENSG00000233950.1",
"ENSG00000230089.1", "ENSG00000225188.1", "LOC101929006", "HSALNG0049045",
"lnc-OR14J1-2", "NONHSAG043350.2", "NONHSAG045640.2", "NONHSAG045830.2",
"NONHSAG046018.2", "NONHSAG046538.2", "ENSG00000231860.1", "ENSG00000224328.1",
"ENSG00000236766.1", "ENSG00000224508.1", "ENSG00000236522.1",
"ENSG00000229681.1", "ENSG00000233883.1", "MDC1-AS1", "HSALNG0049184",
"NONHSAG043427.2", "NONHSAG045580.2", "NONHSAG045701.2", "NONHSAG045891.2",
"NONHSAG046074.2", "NONHSAG046228.2", "NONHSAG046589.2", "ENSG00000249981.1",
"ENSG00000276297.1", "ENSG00000280619.1", "AC145141.1", "LOC107987420",
"LOC107987434", "HSALNG0042531", "lnc-BDP1-1", "NONHSAG040656.2",
"ENSG00000242086.8", "ENSG00000280512.2", "ENSG00000281794.2",
"MUC20-OT1", "NONHSAG037073.2", "ENSG00000242086.8", "ENSG00000280512.2",
"ENSG00000281794.2", "ENSG00000281060.2", "ENSG00000282953.1",
"MUC20-OT1", "HSALNG0031832", "NONHSAG037073.2", "ENSG00000224835.1",
"ENSG00000227198.1", "ENSG00000233169.1", "ENSG00000225390.1",
"C6orf47-AS1", "HSALNG0049305", "NONHSAG043504.2", "NONHSAG046125.2",
"NONHSAG046270.2", "NONHSAG046461.2", "ENSG00000272566.1", "ENSG00000280590.1",
"ENSG00000280853.1", "ENSG00000281916.1", "AF250324.1", "ENSG00000272566",
"lnc-FRG2-13", "ACA38 snoRNA", "ENSG00000200816.1", "ENSG00000266847.1",
"ENSG00000263994.1", "ENSG00000264153.1", "ENSG00000263879.1",
"SNORA38")), row.names = c(NA, -88L), class = c("tbl_df", "tbl",
"data.frame"), spec = structure(list(cols = list(RNAcentral_id = structure(list(), class = c("collector_character",
"collector")), Database = structure(list(), class = c("collector_character",
"collector")), external_id = structure(list(), class = c("collector_character",
"collector")), NCBI_taxon_id = structure(list(), class = c("collector_double",
"collector")), RNA_type = structure(list(), class = c("collector_character",
"collector")), gene_name = structure(list(), class = c("collector_character",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = "\t"), class = "col_spec"))
the one with the error:
my_data %>%
pivot_wider(names_from = Database, values_from = c(gene_name)) %>%
unnest()
my workaround try:
mynested_data <- my_data %>%
pivot_wider(names_from = Database, values_from = c(gene_name))
c("ENSEMBL", "GENCODE", "NONCODE", "ENA", "GENECARDS", "LNCBOOK",
"LNCIPEDIA") %>%
set_names(.) %>%
map(~ mynested_data %>%
unnest_wider(.x, names_sep = "_") %>%
unite(col = !!.x, vars(starts_with(!!quo(.x))), sep = ";"))
Error: Must subset columns with a valid subscript vector.
x Subscript has the wrong type `quosures`.
\u2139 It must be numeric or character.
Run `rlang::last_error()` to see where the error occurred.
In unite I tried also to use the col = .x
or col = !!quo(.x)
but I get the same error.
Edit1 What I expect to get as a result I'm doing all this in order to get a tibble that has per row(entry) one RNAcentral_id and the list "columns" made strings with several entries concatenated with a separator ";". ENSEMBL one column, GENCODE one column etc