0

I have written the below code and example data. However, the mutated variable returns all NAs.

I received the error "NAs introduced by coercion", so included as.numeric() around eval(i_tlag_baseline), but this does not seem to fix the issue.

Also, class() of the cut variable in the df_i data frame shows as numeric when entering the variables manually outside of the for loop and dyplyr pipeline.

How can this be fixed?

Code

names.dfs <- c("df1", "df2", "df3")

for (i in names.dfs){

  df_i <- get(i)  
  
  i_t_210 <- paste0(i,"_t_210")
  
  i_tlag_baseline <- paste0(i,"_timediff")
  
  df_i <- df_i %>%
          mutate({{i_t_210}} := cut(as.numeric(eval(i_tlag_baseline)), breaks = c(-2,0,10,22,34,46,58,70), labels = c("baseline","Timepoint0.5", "Timepoint1", "Timepoint2", "Timepoint3","Timepoint4","Timepoint5")))
  
  assign(paste0(i), df_i)
  
}

EXAMPLE DATA

df1 <- structure(list(ResultsID = c(4, 4, 3, 3, 1, 1), RepeatNo = c(0L, 
0L, 0L, 0L, 0L, 0L), Submitted_df1 = structure(c(17484, 17484, 
17488, 17497, 17502, 17509), class = "Date"), df1_timediff = c(0, 
0, 0, 0.295687885010267, 0, 0.229979466119097)), row.names = c(NA, 
-6L), groups = structure(list(ResultsID = c(1, 3, 4), .rows = structure(list(
    5:6, 3:4, 1:2), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

df2 <- structure(list(ResultsID = c(1, 5, 3, 1, 2, 4), RepeatNo = c(0L, 
0L, 0L, 0L, 0L, 0L), Submitted_df2 = structure(c(16856, 16858, 
16861, 16869, 16875, 16888), class = "Date"), df2_timediff = c(0, 
0, 0, 0.427104722792608, 0, 0)), row.names = c(NA, -6L), groups = structure(list(
    ResultsID = c(1, 2, 3, 4, 5), .rows = structure(list(c(1L, 
    4L), 5L, 3L, 6L, 2L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -5L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))
  
df3 <-  structure(list(ResultsID = c(4, 1, 3, 4, 3, 2), RepeatNo = c(0L, 
0L, 0L, 0L, 0L, 0L), Submitted_df3 = structure(c(17912, 17913, 
17915, 17916, 17919, 17921), class = "Date"), df3_timediff = c(0, 
0, 0, 0.131416837782341, 0.131416837782341, 0)), row.names = c(NA, 
-6L), groups = structure(list(ResultsID = c(1, 2, 3, 4), .rows = structure(list(
    2L, 6L, c(3L, 5L), c(1L, 4L)), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -4L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame")) 
Martin Gal
  • 16,640
  • 5
  • 21
  • 39
Aepkr
  • 101
  • 8

1 Answers1

1

This should work:

names.dfs <- c("df1", "df2", "df3")

for (i in names.dfs){
  
  df_i <- get(i)  
  
  i_t_210 <- paste0(i,"_t_210")
  
  i_tlag_baseline <- paste0(i,"_timediff")
  
  df_i <- df_i %>%
    mutate({{i_t_210}} := cut(as.numeric(UQ(rlang::sym(i_tlag_baseline))), 
                              breaks = c(-2,0,10,22,34,46,58,70), 
                              labels = c("baseline","Timepoint0.5", "Timepoint1", 
                                         "Timepoint2", "Timepoint3","Timepoint4",
                                         "Timepoint5")))
  assign(paste0(i), df_i)
  
}  

I have found this answer quite useful in these situations.

DaveArmstrong
  • 18,377
  • 2
  • 13
  • 25