You can use lapply
and match
to match multiple columns based on lookup table.
cbind(results, lapply(results, \(x) lookup$id[match(x, lookup$price)]))
# price_1 price_2 price_1 price_2
#1 2 3 B C
#2 2 1 B A
#3 1 1 A A
And with the desired column names using setNames
and sub
.
cbind(results, lapply(setNames(results, sub("price", "id", names(results))),
\(x) lookup$id[match(x, lookup$price)]))
# price_1 price_2 id_1 id_2
#1 2 3 B C
#2 2 1 B A
#3 1 1 A A
Or using unlist
and format the result using a marix
.
cbind(results, matrix(lookup$id[match(unlist(results, FALSE, FALSE),
lookup$price)], nrow(results)))
# price_1 price_2 1 2
#1 2 3 B C
#2 2 1 B A
#3 1 1 A A
Or using fastmatch
.
library(fastmatch)
cbind(results, lapply(results, \(x) lookup$id[fmatch(x, lookup$price)]))
For other options have a look at fast R lookup table.
Benchmark
set.seed(0)
n <- 1e6
lookup <- data.frame(id = sample(LETTERS), price = sample(length(LETTERS)))
results <- data.frame(price_1 = sample(lookup$price, n, TRUE),
price_2 = sample(lookup$price, n, TRUE))
library(fastmatch)
library(dplyr)
bench::mark(check = FALSE,
"GKi Lapply" = cbind(results, lapply(results, \(x) lookup$id[match(x, lookup$price)])),
"GKi unlist" = cbind(results, matrix(lookup$id[match(unlist(results, FALSE, FALSE),
lookup$price)], nrow(results))),
"GKi fastmatch" = cbind(results, lapply(results, \(x) lookup$id[fmatch(x, lookup$price)])),
"Ronak Shah Base" = {new_df <- results
names(new_df) <- paste0("id", seq_along(new_df))
new_df[] <- lookup$id[match(unlist(new_df), lookup$price)]
cbind(results, new_df)},
"Ronak Shah dplyr" = {bind_cols(results, results %>% mutate_all(~lookup$id[match(., lookup$price)]))}
)
Result
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl>
1 GKi Lapply 14.39ms 20.73ms 31.9 30.6MB 19.9 16 10
2 GKi unlist 34.83ms 41.64ms 21.1 76.3MB 30.7 11 16
3 GKi fastmatch 10.98ms 12.14ms 63.5 22.9MB 21.8 32 11
4 Ronak Shah Base 1.09s 1.09s 0.917 176MB 4.58 1 5
5 Ronak Shah dplyr 48.31ms 55.1ms 17.8 35.6MB 11.8 9 6
In this case using lapply
with fastmatch
is the fastest of the compared methods.