5

I am trying to group some rows/variables (both categorical and continuous) to help with the table readability in a large dataset.

Here is the dummy dataset:

library(gtsummary)
library(tidyverse)
library(gt)
set.seed(11012021)

# Create Dataset
PIR <- 
  tibble(
    siteidn = sample(c("1324", "1329", "1333", "1334"), 5000, replace = TRUE, prob = c(0.2, 0.45, 0.15, 0.2)) %>% factor(),
    countryname = sample(c("NZ", "Australia"), 5000, replace = TRUE, prob = c(0.3, 0.7)) %>% factor(),
    hospt = sample(c("Metropolitan", "Rural"), 5000, replace = TRUE, prob = c(0.65, 0.35)) %>% factor(),
    age = rnorm(5000, mean = 60, sd = 20),
    apache2 = rnorm(5000, mean = 18.5, sd=10),
    apache3 = rnorm(5000, mean = 55, sd=20),
    mechvent = sample(c("Yes", "No"), 5000, replace = TRUE, prob = c(0.4, 0.6)) %>% factor(),
    sex = sample(c("Female", "Male"), 5000, replace = TRUE) %>% factor(),
    patient = TRUE
  ) %>%
  mutate(patient_id = row_number())%>% 
  group_by(
    siteidn) %>% mutate(
      count_site = row_number() == 1L) %>%
  ungroup()%>% 
  group_by(
    patient_id) %>% mutate(
      count_pt = row_number() == 1L) %>%
  ungroup()

Then I use the following code to generate my table:

t1 <- PIR %>% 
  select(patientn = count_pt, siten = count_site, age, sex, apache2, apache3,  apache2, mechvent, countryname) %>% 
  tbl_summary(
    by = countryname,
    missing = "no", 
    statistic = list(
      patientn ~ "{n}",
      siten ~ "{n}",
      age ~ "{mean} ({sd})",
      apache2 ~ "{mean} ({sd})",
      mechvent ~ "{n} ({p}%)",
      sex ~ "{n} ({p}%)",
      apache3 ~ "{mean} ({sd})"),
    label = list(
      siten = "Number of ICUs",
      patientn = "Number of Patients",
      age = "Age",
      apache2 = "APACHE II Score",
      mechvent = "Mechanical Ventilation",
      sex = "Sex",
      apache3 = "APACHE III Score")) %>% 
  modify_header(stat_by = "**{level}**") %>%
  add_overall(col_label = "**Overall**")
  
t2 <- PIR %>% 
  select(patientn = count_pt, siten = count_site, age, sex, apache2, apache3,  apache2, mechvent, hospt) %>% 
  tbl_summary(
    by = hospt,
    missing = "no", 
     statistic = list(
      patientn ~ "{n}",
      siten ~ "{n}",
      age ~ "{mean} ({sd})",
      apache2 ~ "{mean} ({sd})",
      mechvent ~ "{n} ({p}%)",
      sex ~ "{n} ({p}%)",
      apache3 ~ "{mean} ({sd})"),
    label = list(
      siten = "Number of ICUs",
      patientn = "Number of Patients",
      age = "Age",
      apache2 = "APACHE II Score",
      mechvent = "Mechanical Ventilation",
      sex = "Sex",
      apache3 = "APACHE III Score")) %>%  
  modify_header(stat_by = "**{level}**")

tbl <-
  tbl_merge(
    tbls = list(t1, t2),
    tab_spanner = c("**Country**", "**Hospital Type**")
  ) %>%
  modify_spanning_header(stat_0_1 ~ NA) %>%
  modify_footnote(everything() ~ NA)

This produces the following table:

Table 1

I would like to group certain rows together for ease of reading. Ideally, I would like the table to look like this:

Table 1 Ideal

I have attempted using the gt package, with the following code:

tbl <-
  tbl_merge(
    tbls = list(t1, t2),
    tab_spanner = c("**Country**", "**Hospital Type**")
  ) %>%
  modify_spanning_header(stat_0_1 ~ NA) %>%
  modify_footnote(everything() ~ NA) %>% 
as_gt() %>%  
  gt::tab_row_group(
    group = "Severity of Illness Scores",
    rows = 7:8) %>%  
  gt::tab_row_group(
    group = "Patient Demographics",
    rows = 3:6) %>%  
  gt::tab_row_group(
    group = "Numbers",
    rows = 1:2)

This produces the desired table:

Table 1 Sections

There are a couple of issues I'm having with the way that I'm doing this.

  1. When I try to use the row names (variables), an error message comes up (Can't subset columns that don't exist...). Is there a way to do this by using the variable names? With larger tables, I am getting into some trouble with using the row numbers method of assigning row names. This is particularly true when there is a single variable that loses its place as it's moved to the end to account for the grouped rows.

  2. Is there a way to do this prior to piping into tbl_summary? Although I like the output of this table, I use Word as my output document for statistical reports and would like the ability to be able to format the tables in Word if need be (or by my collaborators). I usually use gtsummary::as_flextable for table output.

Thanks again,

Ben

Benjamin Moran
  • 109
  • 1
  • 8
  • thank you for including examples we can run on our machine and examples of what you'd like the output to look like. The next steps is to make your examples minimal. You can read more about minimal reproducible examples here (it's a short read): https://reprex.tidyverse.org/ – Daniel D. Sjoberg Jan 11 '21 at 14:34
  • Thanks. I'll look into that. – Benjamin Moran Jan 12 '21 at 09:03

2 Answers2

7
  1. When I try to use the row names (variables), an error message comes up (Can't subset columns that don't exist...). Is there a way to do this by using the variable names? With larger tables, I am getting into some trouble with using the row numbers method of assigning row names. This is particularly true when there is a single variable that loses its place as it's moved to the end to account for the grouped rows.

There are two ways to go about this, 1. build separate tables for each group, then stack them, and 2. add a grouping column to .$table_body then group the tibble by the new variable.

library(gtsummary)
library(dplyr)
packageVersion("gtsummary")
#> '1.3.6'

# Method 1 - Stack separate tables
t1 <- trial %>% select(age) %>% tbl_summary()
t2 <- trial %>% select(grade) %>% tbl_summary()

tbl1 <-
  tbl_stack(
    list(t1, t2), 
    group_header = c("Demographics", "Tumor Characteristics")
  ) %>%
  modify_footnote(all_stat_cols() ~ NA)

# Method 2 - build a grouping variable
tbl2 <-
  trial %>%
  select(age, grade) %>%
  tbl_summary() %>%
  modify_table_body(
    mutate,
    groupname_col = case_when(variable == "age" ~ "Deomgraphics",
                              variable == "grade" ~ "Tumor Characteristics")
  )

enter image description here

2.Is there a way to do this prior to piping into tbl_summary? Although I like the output of this table, I use Word as my output document for statistical reports and would like the ability to be able to format the tables in Word if need be (or by my collaborators). I usually use gtsummary::as_flextable for table output.

The examples above modify the table before exporting to gt format, so you can export these example to flextable. However, flextable does not have the same built-in header row functionality (or at least I am unaware of it, and don't use it in as_flex_table()), and the output would look like the table below. I recommend installing the dev version of gt from GitHub and export to RTF (supported by Word)--they've made many updates to RTF output in the last months, and it may work for you.

enter image description here

Daniel D. Sjoberg
  • 8,820
  • 2
  • 12
  • 28
  • 2
    Thanks again for your reply. I tried to use Method 2, which worked for a single table, but when I tried to merge the tables, the grouped rows were no more. If I used Method 1, I would have to construct 15 separate tables (3 merged for each level and 5 stacked levels). It's probably easier (and more time efficient) to use flextable and insert the rows in Word. The journal will likely change the format of the table. I will play around with what you've taught me and what I can find exporting through gt. Thanks again for all your help. It is greatly appreciated. Ben – Benjamin Moran Jan 12 '21 at 03:03
  • In that case, I would merge first, then add the grouping variables – Daniel D. Sjoberg Oct 16 '22 at 16:51
2

I think I might have a solution for this (thanks, obviously, to Daniel Sjoberg and team providing us with the modify_table_body function)

All you need to do is edit the underlying data frame to add a variable with your desired grouping row using modify_table_body, and then put it in the position you want it to be in, like this:

library(gtsummary)
library(dplyr)
packageVersion("gtsummary")

trial%>%
 select(age, stage, grade)%>%
 tbl_summary()%>%
 modify_table_body(
  ~.x %>% 
  
  # add your variable
  rbind(
    tibble(
      variable="Demographics",
      var_type=NA,
      var_label = "Demographics",
      row_type="label",
      label="Demographics",
      stat_0= NA))%>% # expand the components of the tibble as needed if you have more columns
  
  # can add another one
    rbind(
      tibble(
        variable="Tumor characteristics",
        var_type=NA,
        var_label = "Tumor characteristics",
        row_type="label",
        label="Tumor characteristics",
        stat_0= NA))%>%  
    
  # specify the position you want these in
    
  arrange(factor(variable, levels=c("Demographics",
                                    "age",
                                    "Tumor characteristics",
                                    "stage",
                                    "grade"))))%>%

# and you can then indent the actual variables
modify_column_indent(columns=label, rows=variable%in%c("age",
                                                      "stage",
                                                      "grade"))%>%

# and double indent their levels
modify_column_indent(columns=label, rows= (variable%in%c("stage",
                                                      "grade") 
                                             & row_type=="level"),
                     double_indent=T)

enter image description here

Guilherme
  • 63
  • 7