One approach is to use rrapply
in the rrapply
-package (extension of base rapply
):
library(rrapply)
x <- list(a=1, b=2, c=list(ca=1, cb=2, cc=NULL), d=NULL)
rrapply(x, condition = Negate(is.null), how = "prune")
#> $a
#> [1] 1
#>
#> $b
#> [1] 2
#>
#> $c
#> $c$ca
#> [1] 1
#>
#> $c$cb
#> [1] 2
Benchmark timings
Benchmarking the computation time of rrapply
against rlist's list.clean
function for some large nested lists, I get the following results:
## recursively create nested list with dmax layers and 50% NULL elements
f <- function(len, d, dmax) {
x <- vector(mode = "list", length = len)
for(i in seq_along(x)) {
if(d + 1 < dmax) {
x[[i]] <- Recall(len, d + 1, dmax)
} else {
x[[i]] <- list(1, NULL)
}
}
return(x)
}
## long shallow list (3 layers, total 5e5 nodes)
x_long <- f(len = 500, d = 1, dmax = 3)
microbenchmark::microbenchmark(
rlist = rlist::list.clean(x_long, recursive = TRUE),
rrapply = rrapply::rrapply(x_long, condition = Negate(is.null), how = "prune"),
check = "equal",
times = 5L
)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> rlist 2331.4914 2343.3001 2438.9674 2441.3850 2512.3484 2566.3121 5
#> rrapply 353.7169 393.0646 400.8198 399.7971 417.7235 439.7972 5
## deeply nested list (18 layers, total 2^18 nodes)
x_deep <- f(len = 2, d = 1, dmax = 18)
microbenchmark::microbenchmark(
rlist = rlist::list.clean(x_deep, recursive = TRUE),
rrapply = rrapply::rrapply(x_deep, condition = Negate(is.null), how = "prune"),
check = "equal",
times = 5L
)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> rlist 2167.2946 2251.5203 2279.9963 2292.5045 2332.4432 2356.2188 5
#> rrapply 268.9463 274.7437 325.9585 292.4559 354.1607 439.4857 5