How to unnest a data frame with lists within an observation to form a tidy data frame

Question

I have a data set as shown below. I am struggling how to unnest/unchop the lists in each observation. How would I unnest each list in each row so the data is formatted so each row is an observation. I have tried using the unnest() and specifying the column to unnest but nothing changes.

I am new to understanding lists and not sure how to proceed.

I believe it is similar to this example

Data

data <- structure(list(date = c("2023-02-07 17:10:30.622999", "2023-02-07 17:10:30.819000", 
"2023-02-07 17:10:30.822999", "2023-02-07 17:10:31.598000", "2023-02-07 17:10:31.815000", 
"2023-02-07 17:10:32.676000"), label = c("location", "region", 
"site", "region", "site", "site"), value = c("list(\"brazil\", \"panama\", \"usa\")", 
"list()", "list()", "list(\"santa_virginia\", \"el_cope\", \"santa_fe\", \"fortuna\", \"altos_de_campana\", \"el_valle\", \"caribbean\", \"boraceia\", \"pennsylvania\", \"tennessee\", \"vermont\", \"new_mexico\", \"louisiana\", \"california\")", 
"list()", "list(\"lago_sede_water\", \"rio_marta\", \"4_land\", \"sophia_stream\", \"2_water\", \"2_land\", \"6_water\", \"medina\", \"6_land\", \"lago_angelim_water\", \"5_land\", \"altos_de_piedra\", \"5_water\", \"4_water\", \"8_water\", \"8_land\", \"3_land\", \"3_water\", \"7_land\", \"7_water\", \"alleman\", \"campana_loop\", \"cerro_negro\", \"guabal\", \"fortuna_loop\", \"hotel_campestre\", \"jordinal\", \"lago_minutal_water\", \"mata_ahogado\", \"omar_torrijos\", \"rabbit_stream\", \"rio_blanco\", \"1_land\", \"rio_maria\", \"rio_tigrero\", \"sargentita\", \"sora\", \"poça_temporária_water\", \n    \"trilha_land\", \"11_water\", \"1_water\", \"9_water\", \"lago_anta_water\", \"9_land\", \"b3w\", \"b3t\", \"b4t\", \"b5\", \"olho_de_agua\", \"a4w\", \"pirapitinga\", \"a3w\", \"a2w\", \"estrada\", \"alojamento\", \"admin_pond\", \"rv_pond\", \"tuttle_pond\", \"tryon_weber\", \"vorisek_pond\", \"wood_lab_pond\", \"phelps_pond\", \"beaver\", \"cow_pit\", \"black_jack\", \"david's_pond\", \"church\", \"hatchery\", \"newt\", \"west\", \"sink\", \"pine\", \"deer\", \"camp_johnson_pond_1\", \"red_tank\", \"camp_johnson_pond_2\", \"natural_area_close_to_vt01_and_vt02\", \"adjacent_to_sand_bar_state_park\", \n    \"camp_ethan_allen_pond\", \"north_beach_pond\", \"shelbourne_bay\", \"shelbourne_pond\", \"kisatchie_bayou\", \"horse_head_ephemeral\", \"solomon_lane\", \"kurthwood\", \"horse_head_permanent\", \"garcia_well\", \"johnson_tank\", \"davie's_playa__doug_burkett\", \"cuchillo\", \"rouse_tank\", \"avilas\", \"rhodes_spring\", \"davie's_playa\", \"circle_7\", \"red_tank_2\", \"artesia\", \"paseo_del_rio\", \"50783\", \"72808\", \"11858\", \"84235\", \"70550\", \"12590\", \"84255\", \"84313\", \"84226\", \"82682\", \"84218\", \"74976\", \"10100\", \"72336\", \"10475\", \n    \"72996\", \"11008\", \"70556\", \"10486\", \"70327\", \"50785\", \"74281\", \"72973\", \"70449\", \"12618\", \"10102\", \"10101\", \"21520\", \"22008\", \"20135\", \"84237\", \"84325\", \"12621\", \"50839\", \"70641\", \"70505\", \"70175\", \"70413\", \"70370\", \"10421\", \"70279\", \"20198\", \"20196\", \"10422\", \"10319\", \"11215\", \"72093\", \"10225\", \"50837\", \"11009\", \"10223\", \"70470\", \"20231\", \"70114\", \"10220\", \"20170\", \"72442\", \"50787\", \"10206\", \"70284\", \"11469\", \"70481\", \"20199\", \"20169\", \"52003\", \"72695\", \"10488\", \"50218\", \"54188\", \"70628\", \"54204\", \n    \"50887\", \"50897\", \"50899\", \"10593\", \"10055\", \"10090\", \"21081\", \"21111\", \"21108\", \"21110\", \"10037\", \"70571\", \"70611\", \"10474\", \"11030\", \"10477\", \"11029\", \"10489\", \"52127\", \"11010\", \"52171\", \"52193\", \"52234\", \"52238\", \"70567\", \"11040\", \"73037\", \"84221\", \"13025\", \"50162\", \"50731\", \"70403\", \"72008\", \"70619\", \"10490\", \"10487\", \"10476\", \"12110\", \"12111\", \"52271\", \"52212\", \"52244\", \"50307\", \"50306\", \"54203\", \"51357\", \"50305\", \"50858\", \"50138\", \"10697\", \"10461\", \"22019\", \"50217\", \"50304\", \"50929\", \"72989\", \n    \"52253\", \"52254\", \"52255\", \"52256\", \"52257\", \"52258\", \"52259\", \"52260\", \"52261\", \"52262\", \"52263\", \"52264\", \"52266\", \"52267\", \"52268\", \"53003\", \"10032\", \"10315\", \"10316\", \"52240\", \"52241\", \"52245\", \"52248\", \"52249\", \"11027\", \"52250\", \"52251\", \"52252\", \"54200\", \"70295\", \"70331\", \"84326\", \"84327\", \"84329\", \"70525\", \"51356\", \"11506\", \"21109\", \"10572\", \"10570\", \"12464\", \"10008\", \"51365\", \"51801\", \"54202\", \"70251\", \"70348\", \"70629\", \"10196\", \"50895\", \"10198\", \"50904\", \"50967\", \"52246\", \"52247\", \"54035\")"
)), row.names = c(NA, -6L), class = "data.frame")


if (!require(librarian)){
  install.packages("librarian")
  library(librarian)
}

# librarian downloads, if not already downloaded, and reads in needed packages

librarian::shelf(tidyverse, here, janitor, purrr, learnitdown, tidyr)

un_nested <- data %>% unnest(value) #no luck

un_nested <- data %>% unchop(value) # no luck either

You don't have a `list` column. It is character column – akrun Feb 07 '23 at 22:50 — akrun, Feb 07 '23 at 22:50

score 1 · Accepted Answer · answered Feb 07 '23 at 22:53

If you don't actually have a list column and it's a character column like you have in your example, you can use a new function from tidyr.

Make sure to run install.packages('tidyr') if you haven't recently. You'll still need to parse the value column to clean it up a bit, but this gets you mostly there.

data %>% 
  separate_longer_delim(value, ',')

                          date    label                                  value
1   2023-02-07 17:10:30.622999 location                          list("brazil"
2   2023-02-07 17:10:30.622999 location                               "panama"
3   2023-02-07 17:10:30.622999 location                                 "usa")
4   2023-02-07 17:10:30.819000   region                                 list()
5   2023-02-07 17:10:30.822999     site                                 list()
6   2023-02-07 17:10:31.598000   region                  list("santa_virginia"
7   2023-02-07 17:10:31.598000   region                              "el_cope"
8   2023-02-07 17:10:31.598000   region                             "santa_fe"
9   2023-02-07 17:10:31.598000   region                              "fortuna"
10  2023-02-07 17:10:31.598000   region                     "altos_de_campana"
11  2023-02-07 17:10:31.598000   region                             "el_valle"
12  2023-02-07 17:10:31.598000   region                            "caribbean"
13  2023-02-07 17:10:31.598000   region                             "boraceia"
14  2023-02-07 17:10:31.598000   region                         "pennsylvania"
15  2023-02-07 17:10:31.598000   region                            "tennessee"
16  2023-02-07 17:10:31.598000   region                              "vermont"
17  2023-02-07 17:10:31.598000   region                           "new_mexico"
18  2023-02-07 17:10:31.598000   region                            "louisiana"
19  2023-02-07 17:10:31.598000   region                          "california")
20  2023-02-07 17:10:31.815000     site                                 list()
21  2023-02-07 17:10:32.676000     site                 list("lago_sede_water"
22  2023-02-07 17:10:32.676000     site                            "rio_marta"

score 0 · Answer 2 · answered Feb 07 '23 at 23:05

data %>%
  mutate(value = map(value, ~unlist(eval(str2lang(.x)))))%>%
  unnest(value)
# A tibble: 299 × 3
   date                       label    value           
   <chr>                      <chr>    <chr>           
 1 2023-02-07 17:10:30.622999 location brazil          
 2 2023-02-07 17:10:30.622999 location panama          
 3 2023-02-07 17:10:30.622999 location usa             
 4 2023-02-07 17:10:31.598000 region   santa_virginia  
 5 2023-02-07 17:10:31.598000 region   el_cope         
 6 2023-02-07 17:10:31.598000 region   santa_fe        
 7 2023-02-07 17:10:31.598000 region   fortuna         
 8 2023-02-07 17:10:31.598000 region   altos_de_campana
 9 2023-02-07 17:10:31.598000 region   el_valle        
10 2023-02-07 17:10:31.598000 region   caribbean       
# … with 289 more rows

How to unnest a data frame with lists within an observation to form a tidy data frame

2 Answers2