One option is to use the vectorised case_when()
function from the dplyr package (https://dplyr.tidyverse.org/reference/case_when.html), e.g.
library(dplyr)
df <- expand.grid(c(FALSE, TRUE, NA), c(FALSE, TRUE, NA)) %>%
.[rep(1:nrow(.), 50000), ] %>%
as.data.frame() %>%
setNames(., nm= c("a", "b"))
df$res <- df %>%
split(., 1:nrow(.)) %>%
lapply(., function(row_i){
ifelse(all(is.na(row_i)), NA,
ifelse(any(row_i, na.rm= TRUE), TRUE,
ifelse(any(!row_i, na.rm= TRUE), FALSE,
row_i)))
}) %>%
unlist()
current_output <- df
# load 'clean' example data
df <- expand.grid(c(FALSE, TRUE, NA), c(FALSE, TRUE, NA)) %>%
.[rep(1:nrow(.), 50000), ] %>%
as.data.frame() %>%
setNames(., nm= c("a", "b"))
case_when_output <- df %>%
mutate(res = case_when(if_any(everything(), ~.x == TRUE) ~ TRUE,
if_all(everything(), ~is.na(.x)) ~ NA,
TRUE ~ FALSE))
all.equal(current_output, case_when_output)
#> [1] TRUE
Created on 2023-07-18 with reprex v2.0.2
Benchmark (6yo core-i5 macbook pro; updated 20-07-2023):
library(dplyr)
# install.packages("purrrlyr")
library(purrrlyr)
library(microbenchmark)
library(ggplot2)
df <- expand.grid(c(FALSE, TRUE, NA), c(FALSE, TRUE, NA)) %>%
.[rep(1:nrow(.), 50000), ] %>%
as.data.frame() %>%
setNames(., nm= c("a", "b"))
ingo_pingo_func <- function(df) {
df$res <- df %>%
split(., 1:nrow(.)) %>%
lapply(., function(row_i){
ifelse(all(is.na(row_i)), NA,
ifelse(any(row_i, na.rm= TRUE), TRUE,
ifelse(any(!row_i, na.rm= TRUE), FALSE,
row_i)))
}) %>%
unlist()
}
jared_mamrot_func <- function(df) {
case_when_output <- df %>%
mutate(res = case_when(if_any(1:2, ~.x == TRUE) ~ TRUE,
if_all(1:2, ~is.na(.x)) ~ NA,
TRUE ~ FALSE))
}
darren_tsai_func <- function(df) {
df$result <- as.logical(do.call(\(...) pmax(..., na.rm = TRUE), df[1:2]))
}
roland_func <- function(df) {
cols <- 1:2
df$result <- Reduce(\(x, y) x | y, df[cols])
df[is.na(df$result), "result"] <- Reduce(\(x, y) ifelse(!is.na(x) | !is.na(y), FALSE, NA),
df[is.na(df$result), cols])
}
yuriy_saraykin_func <- function(df) {
whereNA <- rowSums(is.na(df)) == ncol(df)
df$expected <- rowSums(df, na.rm = TRUE) > 0
df$expected[whereNA] <- NA
}
efz_func <- function(df) {
output <- df %>% by_row(..f=function(row_i){
ifelse(all(is.na(row_i)), NA,
ifelse(any(row_i, na.rm= TRUE), TRUE,
ifelse(any(!row_i, na.rm= TRUE), FALSE,
row_i)))
}, .collate = 'rows')
}
TIC_func <- function(df) {
df$result <- rowSums(df, na.rm = TRUE) > 0 * NA^(rowMeans(is.na(df)) == 1)
}
result <- microbenchmark(ingo_pingo_func(df),
jared_mamrot_func(df),
darren_tsai_func(df),
roland_func(df),
yuriy_saraykin_func(df),
efz_func(df),
TIC_func(df),
times = 5)
result$expr <- forcats::fct_rev(forcats::fct_reorder(result$expr, result$time, mean))
autoplot(result)
