0

I have created a dataframe with mean and sd and median and quartiles for a number of variables. I want to combine the mean and sd columns for each variable into a single column for that variable. Similarly with those variables which are median and quartiles.

This is data:

dvbaseline = structure(list(stage = c("Control", "CKD1-2", "CKD3a", "CKD3b", 
"CKD4-5"), N = c(12L, 18L, 11L, 20L, 22L), mean = structure(list(
    fmd_perc = c(9, 6.8, 11.8, 5.2, 7.5), gtn_perc = c(23.7, 
    16.5, 13.9, 15.2, 14.1), average_imt = c(0.6, 0.5, 0.7, 0.7, 
    0.7)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", 
"data.frame")), sd = structure(list(fmd_perc = c(6.6, 9.6, 11.6, 
7, 6.4), gtn_perc = c(13.1, 5.8, 5.9, 9.6, 7.8), average_imt = c(0.1, 
0.1, 0.2, 0.2, 0.2)), row.names = c(NA, -5L), class = c("tbl_df", 
"tbl", "data.frame")), median = structure(list(ach_perc = c(124.4, 
182.1, 75.6, 138.1, 121.2), snp_perc = c(99.7, 160.4, 23.3, 64.1, 
120)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
)), low = structure(list(ach_perc = c(`25%` = 59.7, `25%` = 64.2, 
`25%` = 34, `25%` = 19.5, `25%` = 67.7), snp_perc = c(`25%` = 38.4, 
`25%` = 106.4, `25%` = 19.1, `25%` = 35.5, `25%` = 78.7)), row.names = c(NA, 
-5L), class = c("tbl_df", "tbl", "data.frame")), high = structure(list(
    ach_perc = c(`75%` = 372.1, `75%` = 309.8, `75%` = 213.6, 
    `75%` = 308, `75%` = 221.7), snp_perc = c(`75%` = 236.3, 
    `75%` = 240.8, `75%` = 74.5, `75%` = 223.8, `75%` = 215.2
    )), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
))), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-5L))

This was created as follows:


dvbaseline = pav_dv%>%
  group_by(stage)%>%
  summarise(N = n(),
            mean = across(c("fmd_perc", "gtn_perc",  "average_imt"), ~round(mean(.,na.rm =T), 1)),
            sd = across(c("fmd_perc", "gtn_perc",  "average_imt"), ~round(sd(.,na.rm =T), 1)),
            median = across(c("ach_perc", "snp_perc"), ~ round(median(., na.rm = T),1)),
            low = across(c("ach_perc", "snp_perc"), ~ round(quantile(., na.rm = T, probs = 0.25),1)),
            high = across(c("ach_perc", "snp_perc"), ~ round(quantile(., na.rm = T, probs = 0.75),1)))%>%
  arrange(factor(stage, levels= c("Control" , "CKD1-2", "CKD3a", "CKD3b", "CKD4-5")))

Interestingly, when I do colnames(dvbaseline) I get "stage" "N" "mean" "sd" "median" "low" "high" . Though, when I print or view the dataframe, there are individual columns like mean$fmd_perc.

I was going to use paste0 to combine mean and sd or median and quartiles. The output should be like this

stage    N    fmd_perc       ach_perc
Control  12   9 (6.6)   124 (59.7,372)

etc.

A sample of the original data:

original = tail(pav_dv)%>%
  select(stage, fmd_perc, gtn_perc, ach_perc, snp_perc, average_imt)

dput(original)

structure(list(stage = c("CKD3a", "Control", "CKD3b", "Control", 
"Control", "Control"), fmd_perc = c(17.0068027210884, 4.90196078431372, 
13.2963988919668, 17.4515235457064, 6.28930817610062, 9.88372093023255
), gtn_perc = c(15.3318077803204, 45.2471482889734, 19.1135734072022, 
39.8148148148148, 27.1844660194175, 25.9587020648967), ach_perc = c(42.6229508196721, 
13.5593220338983, 664.179104477612, 201.960784313725, 144.927536231884, 
9.52380952380952), snp_perc = c(152.238805970149, 0, 331.818181818182, 
188.372093023256, 233.333333333333, 0), average_imt = c(0.6145, 
0.71, 0.6745, 0.4635, 0.60275, 0.6415)), row.names = c(NA, -6L
), class = c("tbl_df", "tbl", "data.frame"))
Mark Davies
  • 787
  • 5
  • 18

0 Answers0