3

I have the following dataframe:

              ML Algorithm Option  Coeff  Lower  Upper
1  Random Forest    Algo_1  Opt_1  0.021 -0.124  0.166
2          Lasso    Algo_1  Opt_1  0.130 -0.012  0.273
3        XGBoost    Algo_1  Opt_1 -0.052 -0.211  0.108
4     Neural Net    Algo_1  Opt_1  0.114 -0.009  0.238
5  Random Forest    Algo_1  Opt_2  0.116 -0.033  0.264
6          Lasso    Algo_1  Opt_2  0.158  0.019  0.297
7        XGBoost    Algo_1  Opt_2 -0.260 -0.508 -0.012
8     Neural Net    Algo_1  Opt_2  0.035 -0.100  0.170
9  Random Forest    Algo_2  Opt_1  0.028 -0.117  0.172
10         Lasso    Algo_2  Opt_1  0.134 -0.008  0.277
11       XGBoost    Algo_2  Opt_1 -0.054 -0.214  0.106
12    Neural Net    Algo_2  Opt_1  0.118 -0.006  0.241
13 Random Forest    Algo_2  Opt_2  0.038 -0.097  0.172
14         Lasso    Algo_2  Opt_2  0.133 -0.006  0.272
15       XGBoost    Algo_2  Opt_2 -0.055 -0.240  0.131
16    Neural Net    Algo_2  Opt_2  0.118 -0.007  0.242

The column ML contains four Machine learning algorithms (Random Forest, Lasso, XGBoost, Neural Net). Each ML can be fit with two Algorithms (Algo_1, Algo_2) and each Algo can be fit via two Options (Opt_1, Opt_2) yielding four distinctive coefficients for each ML.

I plot the data the following way:

p <- ggplot(results2, aes(x = Option, y = Coeff, color = ML))+
  geom_point(size = 5)+ 
  facet_grid(.~ML+Algorithm,scales = "free_x")+
  geom_errorbar(aes(ymin = Lower, ymax = Upper, color = ML), size =1)+
  geom_hline(yintercept = 0, color = "grey", size = 1.5) +
  scale_x_discrete(guide = guide_axis(n.dodge = 2))+
  theme(panel.border = element_blank(),panel.spacing.x = unit(0,"line"))+ # Remove space between facets
  xlab("")+
  ylab("")
p

This is the result: enter image description here

Questions: 1.) How do I move the ML name (i.e. Lasso, Random Forest, XGBoost, Neural Net) to the right so that it is above and between the respective Algos, i.e. Algo_1 and Algo_2, so that each ML name only appears once and not twice above each Algo?

2.) How do I make the ML names bold?

Thank you very much for your help.

miamialan
  • 53
  • 5
  • 2
    You could try using nested facets from the ggh4x package to achieve this. See [here](https://teunbrand.github.io/ggh4x/articles/Facets.html#nested_facets) for a nice demonstration. – Allan Cameron Oct 07 '22 at 14:58

2 Answers2

5

If you want the labels centralized, you can do:

library(ggh4x)

ggplot(results2, aes(x = Option, y = Coeff, color = ML))+
  geom_point(size = 5)+ 
  facet_nested(.~ ML + Algorithm, scales = "free_x",
               strip = strip_nested(text_x = list(
                 element_text(face = 'bold', size = 12),
                 element_text()), by_layer_x = TRUE)) +
  geom_errorbar(aes(ymin = Lower, ymax = Upper, color = ML), size =1)+
  geom_hline(yintercept = 0, color = "grey", size = 1.5) +
  scale_x_discrete(guide = guide_axis(n.dodge = 2))+
  theme(panel.border = element_blank(),panel.spacing.x = unit(0,"line"))+ 
  xlab("") +
  ylab("") 

enter image description here

Allan Cameron
  • 147,086
  • 7
  • 49
  • 87
4

As already suggested by @AllanCaemron in the comments this is one of the use cases where ggh4x is well suited for:

library(ggplot2)
library(ggh4x)

base <- ggplot(results2, aes(x = Option, y = Coeff, color = ML)) +
  geom_point(size = 5) +
  geom_errorbar(aes(ymin = Lower, ymax = Upper, color = ML), size = 1) +
  geom_hline(yintercept = 0, color = "grey", size = 1.5) +
  scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
  theme(panel.border = element_blank(), panel.spacing.x = unit(0, "line")) + # Remove space between facets
  xlab("") +
  ylab("")


base +
  facet_nested(. ~ ML + Algorithm,
    scales = "free_x",
    strip = strip_nested(
      text_x = list(
        element_text(hjust = 1, face = "bold"),
        element_text(hjust = .5)
      ),
      by_layer_x = TRUE
    )
  )

enter image description here

DATA

structure(list(ML = c("Random Forest", "Lasso", "XGBoost", "Neural Net", 
"Random Forest", "Lasso", "XGBoost", "Neural Net", "Random Forest", 
"Lasso", "XGBoost", "Neural Net", "Random Forest", "Lasso", "XGBoost", 
"Neural Net"), Algorithm = c("Algo_1", "Algo_1", "Algo_1", "Algo_1", 
"Algo_1", "Algo_1", "Algo_1", "Algo_1", "Algo_2", "Algo_2", "Algo_2", 
"Algo_2", "Algo_2", "Algo_2", "Algo_2", "Algo_2"), Option = c("Opt_1", 
"Opt_1", "Opt_1", "Opt_1", "Opt_2", "Opt_2", "Opt_2", "Opt_2", 
"Opt_1", "Opt_1", "Opt_1", "Opt_1", "Opt_2", "Opt_2", "Opt_2", 
"Opt_2"), Coeff = c(0.021, 0.13, -0.052, 0.114, 0.116, 0.158, 
-0.26, 0.035, 0.028, 0.134, -0.054, 0.118, 0.038, 0.133, -0.055, 
0.118), Lower = c(-0.124, -0.012, -0.211, -0.009, -0.033, 0.019, 
-0.508, -0.1, -0.117, -0.008, -0.214, -0.006, -0.097, -0.006, 
-0.24, -0.007), Upper = c(0.166, 0.273, 0.108, 0.238, 0.264, 
0.297, -0.012, 0.17, 0.172, 0.277, 0.106, 0.241, 0.172, 0.272, 
0.131, 0.242)), class = "data.frame", row.names = c("1", "2", 
"3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", 
"15", "16"))
stefan
  • 90,330
  • 6
  • 25
  • 51
  • Thank you. Your solution works great. A final add-on question: How can I add the value of the coefficients above/below/close to the respective geom_point? – miamialan Oct 07 '22 at 17:08