1

I have this df:

>df
    author author_id other_authors other_authors_id
        A      123       D, E ,F       011 , 021, 003
        B      122       G             111
        C      121       H, F          101, 003

the last two columns have values stored as list. I would like to make it from wide to long, but i'm not sure what's the best way to do it. I'm trying to create a network graph from it.

I want to gather them so they look like this:

author other_autors author_id other_autors_id
A      D            123       011
A      E            123       021
A      F            123       003
B      G            122       111
C      H            121       101
C      F            121       003

any ideas how to do it? I've managed to do this, but it only works if the values are not lists.

gather(df, key="author", value="other_authors", -author)
Lucca Ramalho
  • 573
  • 1
  • 6
  • 16
  • We can use `library(splitstackshape);cSplit(df, c("other_authors", "other_authors_id"), ",\\s*", "long")` – akrun Apr 25 '19 at 19:17

2 Answers2

0

We can use cSplit from splitstackshape for multiple column splits

library(splitstackshape)
cSplit(df, c("other_authors", "other_authors_id"), ", ", "long",
       fixed = FALSE, type.convert = FALSE)
#    author author_id other_authors other_authors_id
#1:      A       123             D              011
#2:      A       123             E              021
#3:      A       123             F              003
#4:      B       122             G              111
#5:      C       121             H              101
#6:      C       121             F              003

Or using separate_rows from tidyr

library(tidyverse)
df %>%
   separate_rows(other_authors, other_authors_id)
#   author author_id other_authors other_authors_id
#1      A       123             D              011
#2      A       123             E              021
#3      A       123             F              003
#4      B       122             G              111
#5      C       121             H              101
#6      C       121             F              003

Update

If the columns 'other_authors', 'others_authors_id' are list columns, then we can use unnest

df1 %>%
       unnest
#  author author_id other_authors other_authors_id
#1      A       123             D              011
#2      A       123             E              021
#3      A       123             F              003
#4      B       122             G              111
#5      C       121             H              101
#6      C       121             F              003

data

df <- structure(list(author = c("A", "B", "C"), author_id = 123:121, 
other_authors = c("D, E ,F", "G", "H, F"), other_authors_id = c("011 , 021, 003", 
"111", "101, 003")), class = "data.frame", row.names = c(NA, 
 -3L))

df1 <- structure(list(author = c("A", "B", "C"), author_id = 123:121, 
other_authors = list(c("D", "E", "F"), "G", c("H", "F")), 
other_authors_id = list(c("011", "021", "003"), "111", c("101", 
"003"))), row.names = c(NA, -3L), class = "data.frame")
akrun
  • 874,273
  • 37
  • 540
  • 662
0

Please provide a reproducible example next time.

library(tidyverse)  
df <- tribble(~author,~author_id, ~other_authors,~other_authors_id,
        "A",     123,       list("D", "E" ,"F"),    list(011 , 021, 003),
        "B",     122,       list("G")            ,    list(111),
        "C",     121,       list("H","F")     ,    list(101, 003))

df %>% 
  unnest(other_authors,other_authors_id) %>% 
  mutate(other_authors = flatten_chr(other_authors),
         other_authors_id = flatten_dbl(other_authors_id))
Bruno
  • 4,109
  • 1
  • 9
  • 27