I have created a dataframe
with mean and sd and median and quartiles for a number of variables. I want to combine the mean
and sd
columns for each variable into a single column for that variable. Similarly with those variables which are median and quartiles.
This is data:
dvbaseline = structure(list(stage = c("Control", "CKD1-2", "CKD3a", "CKD3b",
"CKD4-5"), N = c(12L, 18L, 11L, 20L, 22L), mean = structure(list(
fmd_perc = c(9, 6.8, 11.8, 5.2, 7.5), gtn_perc = c(23.7,
16.5, 13.9, 15.2, 14.1), average_imt = c(0.6, 0.5, 0.7, 0.7,
0.7)), row.names = c(NA, -5L), class = c("tbl_df", "tbl",
"data.frame")), sd = structure(list(fmd_perc = c(6.6, 9.6, 11.6,
7, 6.4), gtn_perc = c(13.1, 5.8, 5.9, 9.6, 7.8), average_imt = c(0.1,
0.1, 0.2, 0.2, 0.2)), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame")), median = structure(list(ach_perc = c(124.4,
182.1, 75.6, 138.1, 121.2), snp_perc = c(99.7, 160.4, 23.3, 64.1,
120)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
)), low = structure(list(ach_perc = c(`25%` = 59.7, `25%` = 64.2,
`25%` = 34, `25%` = 19.5, `25%` = 67.7), snp_perc = c(`25%` = 38.4,
`25%` = 106.4, `25%` = 19.1, `25%` = 35.5, `25%` = 78.7)), row.names = c(NA,
-5L), class = c("tbl_df", "tbl", "data.frame")), high = structure(list(
ach_perc = c(`75%` = 372.1, `75%` = 309.8, `75%` = 213.6,
`75%` = 308, `75%` = 221.7), snp_perc = c(`75%` = 236.3,
`75%` = 240.8, `75%` = 74.5, `75%` = 223.8, `75%` = 215.2
)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
))), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-5L))
This was created as follows:
dvbaseline = pav_dv%>%
group_by(stage)%>%
summarise(N = n(),
mean = across(c("fmd_perc", "gtn_perc", "average_imt"), ~round(mean(.,na.rm =T), 1)),
sd = across(c("fmd_perc", "gtn_perc", "average_imt"), ~round(sd(.,na.rm =T), 1)),
median = across(c("ach_perc", "snp_perc"), ~ round(median(., na.rm = T),1)),
low = across(c("ach_perc", "snp_perc"), ~ round(quantile(., na.rm = T, probs = 0.25),1)),
high = across(c("ach_perc", "snp_perc"), ~ round(quantile(., na.rm = T, probs = 0.75),1)))%>%
arrange(factor(stage, levels= c("Control" , "CKD1-2", "CKD3a", "CKD3b", "CKD4-5")))
Interestingly, when I do colnames(dvbaseline)
I get
"stage" "N" "mean" "sd" "median" "low" "high"
. Though, when I print
or view the dataframe
, there are individual columns like mean$fmd_perc
.
I was going to use paste0
to combine mean and sd or median and quartiles.
The output should be like this
stage N fmd_perc ach_perc
Control 12 9 (6.6) 124 (59.7,372)
etc.
A sample of the original data:
original = tail(pav_dv)%>%
select(stage, fmd_perc, gtn_perc, ach_perc, snp_perc, average_imt)
dput(original)
structure(list(stage = c("CKD3a", "Control", "CKD3b", "Control",
"Control", "Control"), fmd_perc = c(17.0068027210884, 4.90196078431372,
13.2963988919668, 17.4515235457064, 6.28930817610062, 9.88372093023255
), gtn_perc = c(15.3318077803204, 45.2471482889734, 19.1135734072022,
39.8148148148148, 27.1844660194175, 25.9587020648967), ach_perc = c(42.6229508196721,
13.5593220338983, 664.179104477612, 201.960784313725, 144.927536231884,
9.52380952380952), snp_perc = c(152.238805970149, 0, 331.818181818182,
188.372093023256, 233.333333333333, 0), average_imt = c(0.6145,
0.71, 0.6745, 0.4635, 0.60275, 0.6415)), row.names = c(NA, -6L
), class = c("tbl_df", "tbl", "data.frame"))