3

I have a input data structure that ends up having leaf nodes being empty lists and the parent lists just contain sublists. The relevant info ends up in the list attributes, and I have to extract that. To that end, I need to get index positions to all list elements and use those coordinates to walk the list. {rrapply} has been working for lists with elements, but I cannot figure out how to get the positions of lists with no non-list sub-elements.

Below is a reprex showing the parts I get {rrapply} to get, and what it misses, and a very old-school solution at the end that I coded up to work. So I have a solution, but I am looking to see if I can get {rrapply} to do this, and understand why I am not able to get it to do what I want.

# stackoverflow_list-element-position-vector.R

library(rrapply)
library(purrr)

# A sample list
lst <-
    list(
        list(),
        list(
            list(),
            list(),
            list()
        ),
        list(a=1:3
             , list()
        )
    )

# Misses all of the empty lists, just the sublist with the name "a":
rrapply(lst,
        f = \(x, .xpos) .xpos,
        how="melt") |>
    pluck("value") |>
    str()
#> List of 1
#>  $ : int [1:2] 3 1

#;; From: https://stackoverflow.com/a/76124073/1022967
#;; Misses the positions of the intermediate nodes, which I need too.
#;; i.e., misses lists at position 2 and 3.
rrapply(lst
        , classes = "list"
        , condition = \(x) length(x) == 0
        , f = \(x) NA
        , how="recurse") |>
    rrapply(f = \(x, .xpos) .xpos
            , how="melt") |>
    pluck("value") |>
    str()
#> List of 6
#>  $ : int 1
#>  $ : int [1:2] 2 1
#>  $ : int [1:2] 2 2
#>  $ : int [1:2] 2 3
#>  $ : int [1:2] 3 1
#>  $ : int [1:2] 3 2

#;; Take a list, return flat list of all of the positions of list
#;; elements.
#;; Real old-school technique...
listEltPos <- function(llst) {
    work_lst <-
        list(list(coord_vec = integer(0), comp = llst))
    output_lst <-
        list()
    
    while (TRUE) {
        if (length(work_lst) == 0) break
        
        # Pop off head of work_lst
        e <-
            pluck(work_lst, 1)
        work_lst <-
            work_lst[-1]
        
        # If element isn't a list, nothing more to process
        if (class(e$comp) != "list") next
        
        # If here, it is a list.  If length is 0, return
        if (length(e$comp) == 0) next
        
        # Otherwise, it has entries.  Find relative coordinate of the entry
        # and add that to the output list and make a list element with that
        # coordinate and the corresponding subelement.
        subElementPositions <-
            seq(length(e$comp))
        
        for (i in subElementPositions) {
            # Append local pos to the overall coordinate vector
            newPosVec <-
                c(e$coord_vec, i)
            sub_e <-
                pluck(e$comp, i)
            
            # Add current new pos to the output queue
            output_lst <-
                append(output_lst, list(newPosVec))
            
            # Append subelements and their updated positions to the work queue
            work_lst <-
                append(work_lst, list(list(coord_vec=newPosVec, comp=sub_e)))
        }
        rm(i)
    }

    output_lst
}

#;; This is what I am hoping for
listEltPos(lst) |>
    str()
#> List of 8
#>  $ : int 1
#>  $ : int 2
#>  $ : int 3
#>  $ : int [1:2] 2 1
#>  $ : int [1:2] 2 2
#>  $ : int [1:2] 2 3
#>  $ : int [1:2] 3 1
#>  $ : int [1:2] 3 2

Created on 2023-05-03 with reprex v2.0.2

mpettis
  • 3,222
  • 4
  • 28
  • 35

2 Answers2

1

A possible approach using rrapply() could be to walk the nested list with how = "recurse" and keeping track of all encountered values of .xpos by assigning them to a global variable (outside the function scope):

library(rrapply)

## initialize empty list
allpos <- list()

## walk nested list and append indices to `allpos`
invisible(
  rrapply(
    lst,
    classes = c("list", "integer"),
    how = "recurse",
    f = \(x, .xpos) {
      allpos <<- append(allpos, list(.xpos))
      x
    }
  )
)

str(allpos)
#> List of 8
#>  $ : int 1
#>  $ : int 2
#>  $ : int [1:2] 2 1
#>  $ : int [1:2] 2 2
#>  $ : int [1:2] 2 3
#>  $ : int 3
#>  $ : int [1:2] 3 1
#>  $ : int [1:2] 3 2

Note: this will not be efficient for very large lists in which case it may be better to determine the size of allpos in advance, instead of iteratively appending new values to the list.

Joris C.
  • 5,721
  • 3
  • 12
  • 27
  • 1
    Thanks! I'm going to see if I can wrap this in a function so that I can update `allpos` in the function body environment and return that, and not pollute the `.GlobalEnv`. But that's a separate question and one that I think will be straightforward to figure out. – mpettis May 04 '23 at 14:13
1

The accepted answer is exactly what I want. I am adding here the way I wrapped that solution in a function so that I don't do any updating to the .GlobalEnv. I also changed one argument to classes to do ANY vs. integer, because I might have more than integer (which I didn't lay out in the original example).

# A sample list, slightly different content, has an integer and character
# element.
lst <-
    list(
        list(),
        list(
            list(),
            list()
        ),
        list(a=1:3
             , list()
             , c= letters[1:4]
        )
    )


# From Joris solution here -- changes 
getListNodePos <- function(llst) {
    library(rrapply)

    #;; https://stackoverflow.com/questions/8771942/how-can-i-reference-the-local-environment-within-a-function-in-r 
    locEnv <- environment()
    
    ## initialize empty list
    allpos <- list()
    
    ## walk nested list and append indices to `allpos`
    invisible(
        rrapply(
            llst,
            classes = c("list", "ANY"),
            how = "recurse",
            f = \(x, .xpos) {
                assign("allpos", append(allpos, list(.xpos)), envir = locEnv)
                x
            }
        )
    )
    
    allpos
}

# Show the indexes
getListNodePos(lst) |>
    str()
#> List of 8
#>  $ : int 1
#>  $ : int 2
#>  $ : int [1:2] 2 1
#>  $ : int [1:2] 2 2
#>  $ : int 3
#>  $ : int [1:2] 3 1
#>  $ : int [1:2] 3 2
#>  $ : int [1:2] 3 3

Created on 2023-05-04 with reprex v2.0.2

mpettis
  • 3,222
  • 4
  • 28
  • 35