How to add multiple new columns with the same logic based on calculation of two columns with the same string in R

Question

I have a table shown below

data <- data.frame(A = c(1,2,3,4,5),
                   B = c(1,2,3,4,5),
                   C = c(11,12,13,14,15),
                   trans_A = c(NA,1,2,3,4),
                   trans_B = c(NA,1,2,3,4),
                   trans_C = c(NA,11,12,13,14))

I need to create/add multiple new columns to this dataframe with new name as difference_A, difference_B, etc. with the logic difference_A = A - trans_A, difference_B = B - trans_B, etc. How can I do this in R?

I tried this:

new_df[paste("difference", cols, sep = "_")]<- c(NA, new_df[cols]-new_df[paste("trans",cols,sep = "_")], na.rm=TRUE)

it gave me error message "duplicate subscripts for columns"

score 4 · Answer 1 · answered Feb 02 '21 at 16:29

One dplyr and purrr solution could be:

map(.x = names(select(data, -starts_with("trans_"))),
    ~ data %>%
     mutate(across(all_of(.x), .names = "difference_{col}") - across(all_of(paste0("trans_", .x))))) %>%
 reduce(full_join)

  A B  C trans_A trans_B trans_C difference_A difference_B difference_C
1 1 1 11      NA      NA      NA           NA           NA           NA
2 2 2 12       1       1      11            1            1            1
3 3 3 13       2       2      12            1            1            1
4 4 4 14       3       3      13            1            1            1
5 5 5 15       4       4      14            1            1            1

score 1 · Answer 2 · answered Feb 02 '21 at 23:09

We can use pivot_longer/pivot_wider

library(dplyr)
library(tidyr)
library(stringr)
data %>% 
    rename_at(vars(which(!str_detect(names(.), '_'))), ~ str_c("orig_", .)) %>% 
    mutate(rn = row_number()) %>% 
    pivot_longer(cols = -rn, names_to = c(".value", "group"), names_sep = "_") %>%
    transmute(group = str_c('difference_', group), rn, difference = orig - trans) %>%
    pivot_wider(names_from = group, values_from = difference ) %>% 
    select(-rn) %>% 
    bind_cols(data, .)
#  A B  C trans_A trans_B trans_C difference_A difference_B difference_C
#1 1 1 11      NA      NA      NA           NA           NA           NA
#2 2 2 12       1       1      11            1            1            1
#3 3 3 13       2       2      12            1            1            1
#4 4 4 14       3       3      13            1            1            1
#5 5 5 15       4       4      14            1            1            1

score 0 · Answer 3 · answered Feb 02 '21 at 23:17

A base R option

cbind(
  data,
  lapply(
    split.default(
      data,
      gsub(".*(\\w)$", "difference_\\1", names(data))
    ), function(v) do.call("-", v)
  )
)

gives

  A B  C trans_A trans_B trans_C difference_A difference_B difference_C
1 1 1 11      NA      NA      NA           NA           NA           NA
2 2 2 12       1       1      11            1            1            1
3 3 3 13       2       2      12            1            1            1
4 4 4 14       3       3      13            1            1            1
5 5 5 15       4       4      14            1            1            1

score 0 · Answer 4 · answered Feb 03 '21 at 10:46

Another proposition :

df <- read.table(header = TRUE, text = "
  A  B  C trans_A trans_B trans_C
1 1  1 11      NA      NA      NA
2 2  2 12       1       1      11
3 3  3 13       2       2      12
4 4  4 14       3       3      13
5 5  5 15       4       4      14
")

for (x in c("A","B","C")) {
  varname <- paste0("difference_",x)
  df[[varname]] <- with(df, get(x) - get(paste0("trans_",x)))
}

df
#>   A B  C trans_A trans_B trans_C difference_A difference_B difference_C
#> 1 1 1 11      NA      NA      NA           NA           NA           NA
#> 2 2 2 12       1       1      11            1            1            1
#> 3 3 3 13       2       2      12            1            1            1
#> 4 4 4 14       3       3      13            1            1            1
#> 5 5 5 15       4       4      14            1            1            1

# Created on 2021-02-03 by the reprex package (v0.3.0.9001

or

for (x in c("A","B","C")) {
  df <- within (df, {
    assign(paste0("difference_",x), get(x) - get(paste0("trans_",x)))
  })
}

df
#>   A B  C trans_A trans_B trans_C difference_A difference_B difference_C
#> 1 1 1 11      NA      NA      NA           NA           NA           NA
#> 2 2 2 12       1       1      11            1            1            1
#> 3 3 3 13       2       2      12            1            1            1
#> 4 4 4 14       3       3      13            1            1            1
#> 5 5 5 15       4       4      14            1            1            1

# Created on 2021-02-03 by the reprex package (v0.3.0.9001)

(Related to Use dynamic variable names in `dplyr`)

Regards,

How to add multiple new columns with the same logic based on calculation of two columns with the same string in R

4 Answers4