1

I I can't figure out a way to make str_glue work across multiple pairs of columns without just typing out each pair (var1_n with var1_rate, var2_n with var2_rate, etc.).

I attempted to use across but I couldn't figure out the right syntax. I know there's an option in janitor/tabyl but I need to export this as a csv and I always have trouble with tabyl format playing nicely.

I tried to mimic the solution from Create multiple columns using pairs of different columns with map2 as well but I am having trouble with how to call columns and rename them in the same expression I think.

Example dataframe:

library(tidyverse)

example_data <-  structure(list(state = c("AK", "AL", "AR", "AZ", "CA", 
                                          "CO","FL", "GA", "HI", "IA", 
                                          "ID", "IL", "IN", "KS", "KY", 
                                          "LA", "MA","ME", "MI", "MN", 
                                          "MO", "MS", "MT", "NC", "ND", 
                                          "NE", "NH", "NM","NV", "NY", 
                                          "OH", "OK", "OR", "PA", "SC",
                                          "SD", "TN", "TX", "UT","VA", 
                                          "VT", "WA", "WI", "WV", "WY"), 
                                
                                n = c(13L, 5L, 28L, 15L, 35L, 32L, 10L, 30L, 9L, 
                                      82L, 27L, 52L, 34L, 82L, 27L, 25L, 3L, 16L, 
                                      36L, 77L, 33L, 30L, 49L, 20L, 36L, 63L, 13L, 
                                      11L,13L, 18L, 33L, 40L, 25L, 16L, 3L, 39L, 
                                      15L, 83L, 13L, 8L, 8L,39L, 58L, 21L, 16L), 
                                
                                var1_n = c(4L, 2L, 15L, 6L,14L, 14L, 6L, 22L,4L, 
                                           43L, 14L, 31L, 10L, 42L, 16L, 11L, 2L,
                                           8L, 22L, 23L, 22L, 16L, 23L, 4L, 5L, 
                                           34L, 5L, 4L, 5L, 8L, 18L,24L, 7L, 5L, NA, 
                                           5L, 10L, 40L, 7L, 4L, 2L, 17L, 36L, 8L, 6L), 
                                
                                var2_n = c(4L, 2L, 15L, 6L, 14L, 14L, 6L, 22L, 4L,
                                           43L, 14L, 31L, 10L, 42L, 16L, 11L, 2L, 
                                           8L, 22L, 23L, 22L, 16L,23L, 4L, 5L, 34L, 
                                           5L, 4L, 5L, 8L, 18L, 24L, 7L, 5L, NA, 5L, 
                                           10L, 40L, 7L, 4L, 2L, 17L, 36L, 8L, 6L), 
                                
                                var3_n = c(4L,2L, 5L, 5L, 8L, 7L, 4L, 8L, 2L, 
                                           15L, 9L, 11L, 2L, 12L, 4L, 6L,1L, 
                                           3L, 8L, 18L, 7L, 5L, 10L, 1L, 4L, 
                                           21L, 1L, 6L, 2L, 5L, 3L,6L, 3L, 1L, NA, 
                                           3L, NA, 16L, 1L, 1L, 2L, 8L, 8L, 2L, 6L), 
                                
                                var4_n = c(12L, 5L, 24L, 10L, 25L, 28L,9L, 26L, 7L, 
                                           73L, 20L, 50L, 33L, 66L, 25L, 21L, 3L,14L, 
                                           31L,70L, 31L, 25L, 36L, 15L, 23L, 48L, 9L, 
                                           10L, 8L, 16L, 30L, 28L,24L, 13L, 1L, 38L, 
                                           12L, 52L, 9L, 8L, 8L, 27L, 54L, 21L, 13L), 
                                
                                var1_rate = c("31%","40%", "54%", "40%", "40%", "44%", "60%", "73%", "44%", 
                                              "52%","52%", "60%", "29%", "51%", "59%", "44%", "67%", "50%", 
                                              "61%","30%", "67%", "53%", "47%", "20%", "14%", "54%", "38%", 
                                              "36%","38%", "44%", "55%", "60%", "28%", "31%", "NA%", "13%", 
                                              "67%","48%", "54%", "50%", "25%", "44%", "62%", "38%", "38%"), 
                                
                                var2_rate = c("31%", "40%", "54%", "40%", "40%", "44%","60%", "73%", "44%", 
                                              "52%", "52%", "60%", "29%", "51%", "59%","44%", "67%", "50%", 
                                              "61%", "30%", "67%", "53%", "47%", "20%","14%", "54%", "38%", 
                                              "36%", "38%", "44%", "55%", "60%", "28%","31%", "NA%", "13%", 
                                              "67%", "48%", "54%", "50%", "25%", "44%","62%", "38%", "38%"), 
                                
                                var3_rate = c("31%", "40%","18%", "33%", "23%", "22%", "40%", "27%", "22%", 
                                              "18%", "33%","21%", "6%", "15%", "15%", "24%", "33%", "19%", 
                                              "22%", "23%","21%", "17%", "20%", "5%", "11%", "33%", "8%", 
                                              "55%", "15%","28%", "9%", "15%", "12%", "6%", "NA%", "8%", 
                                              "NA%", "19%","8%", "13%", "25%", "21%", "14%", "10%", "38%"), 
                                
                                
                                var4_rate = c("92%","100%", "86%", "67%", "71%", "88%", "90%", "87%", "78%",
                                              "89%", "74%", "96%", "97%", "80%", "93%", "84%", "100%","88%", 
                                              "86%", "91%", "94%", "83%", "73%", "75%", "64%", "76%","69%", 
                                              "91%", "62%", "89%", "91%", "70%", "96%", "81%", "33%","97%", 
                                              "80%", "63%", "69%", "100%", "100%", "69%", "93%","100%", "81%")), 
                           
                           row.names = c(NA, -45L), class = "data.frame")

and then the code I used that worked:

pct_table <-  example_data %>%
  mutate(both_var1 = str_glue("{var1_n} ({var1_rate})"))

I am hoping to avoid typing the columns names manually. I am open to other easy ways if they will still play nicely with excel, besides str_glue.

M--
  • 25,431
  • 8
  • 61
  • 93
  • Welcome to Stack Overflow! Read how to provide a [minimal reproducible example of your code and data along with relevant errors](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) to get some ideas on how to make the question reproducible yet minimal (all things considered, this is a good first question; cheers). – M-- Aug 07 '23 at 18:43

2 Answers2

0

Perhaps something like this?

Here, I reshape the data long so that the various variable pairs are stacked instead of side by side. Then we can do the glue function once and reshape wide.

library(dplyr); library(tidyr)
example_data %>%
  rename("total_n" = n) %>%  # to avoid duplicate "n" columns
  pivot_longer(-c(1:2), names_sep = "_", names_to = c("Var", ".value")) %>%
  mutate(both = stringr::str_glue("{n} ({rate})")) %>%
  select(state, total_n, Var, both) |>
  pivot_wider(names_from = Var, values_from = both)


# A tibble: 45 × 6
   state total_n var1     var2     var3     var4    
   <chr>   <int> <glue>   <glue>   <glue>   <glue>  
 1 AK         13 4 (31%)  4 (31%)  4 (31%)  12 (92%)
 2 AL          5 2 (40%)  2 (40%)  2 (40%)  5 (100%)
 3 AR         28 15 (54%) 15 (54%) 5 (18%)  24 (86%)
 4 AZ         15 6 (40%)  6 (40%)  5 (33%)  10 (67%)
 5 CA         35 14 (40%) 14 (40%) 8 (23%)  25 (71%)
 6 CO         32 14 (44%) 14 (44%) 7 (22%)  28 (88%)
 7 FL         10 6 (60%)  6 (60%)  4 (40%)  9 (90%) 
 8 GA         30 22 (73%) 22 (73%) 8 (27%)  26 (87%)
 9 HI          9 4 (44%)  4 (44%)  2 (22%)  7 (78%) 
10 IA         82 43 (52%) 43 (52%) 15 (18%) 73 (89%)
# ℹ 35 more rows
# ℹ Use `print(n = ...)` to see more rows
Jon Spring
  • 55,165
  • 4
  • 35
  • 53
0

I think I might use Map() here:

i <- 1:4
pct_table <- example_data
pct_table[str_glue("both_var{i}")] <- Map(\(x, y) str_glue(
    "{example_data[[x]]} ({example_data[[y]]})"
), str_glue("var{i}_n"), str_glue("var{i}_rate"))


head(pct_table)
#  state  n var1_n var2_n var3_n var4_n var1_rate var2_rate var3_rate var4_rate both_var1 both_var2 both_var3 both_var4
# 1    AK 13      4      4      4     12       31%       31%       31%       92%   4 (31%)   4 (31%)   4 (31%)  12 (92%)
# 2    AL  5      2      2      2      5       40%       40%       40%      100%   2 (40%)   2 (40%)   2 (40%)  5 (100%)
# 3    AR 28     15     15      5     24       54%       54%       18%       86%  15 (54%)  15 (54%)   5 (18%)  24 (86%)
# 4    AZ 15      6      6      5     10       40%       40%       33%       67%   6 (40%)   6 (40%)   5 (33%)  10 (67%)
# 5    CA 35     14     14      8     25       40%       40%       23%       71%  14 (40%)  14 (40%)   8 (23%)  25 (71%)
# 6    CO 32     14     14      7     28       44%       44%       22%       88%  14 (44%)  14 (44%)   7 (22%)  28 (88%)

Incidentally, no need to use stringr::str_glue() here, this can be done straightforwardly in base R:

pct_table[sprintf("both_var%s", i)] <- Map(\(x, y) sprintf(
    "%s (%s)", example_data[[x]], {example_data[[y]]}
), sprintf("var%s_n", i), sprintf("var%s_rate", i))

Or if you prefer, with lapply():

n <- 1:4
pct_table <- example_data
pct_table[sprintf("both_var%s", n)] <- lapply(n, \(i) {
    x <- sprintf("var%s_n", i)
    y <- sprintf("var%s_rate", i)
    sprintf(
        "%s (%s)",
        example_data[[x]],
        {
            example_data[[y]]
        }
    )
})

If you don't want the source columns just do:

pct_table[c(sprintf("var%s_n", 1:4), sprintf("var%s_rate", 1:4))]  <- NULL
SamR
  • 8,826
  • 3
  • 11
  • 33