How can I manually alter the LHS of two equation annotations in ggplot?

Question

The ggmisc package includes the excellent stat_poly_eq function that allows you to annotate a plot with regression equations used in stat/geom_smooth.

In the plot and code below I've tried to use stat_poly_eq to manually specify the LHS of an equation with a custom annotation. I'm aware that an answer to this question goes into some detail about the customisation options available, however, in my attempts — see below — I've been unable to prevent the annotation from printing multiple times.

Does anyone have an idea how to spot the multiple printing? Thanks in advance.

library(ggplot2)
library(ggpmisc)
library(dplyr)
library(tidyr)

## RAW DATA

raw_data <- data.frame(X = c(-49:50))
raw_data$X <- raw_data$X * 0.1
raw_data$Y1 <- 2 * raw_data$X + rnorm(100, sd = 0.5)
raw_data$Y2 <- 3 * raw_data$X + rnorm(100, sd = 0.4)

## PREP DATA

plot_data <- raw_data %>%
  pivot_longer(
    cols = Y1:Y2,
    names_to = "IV",
    values_to = "Ratio") 

## PLOT DATA

plot_data %>%
  ggplot() +
  aes(
    x = X, 
    y = Ratio,
    linetype = IV,
    shape = IV) +
  stat_smooth(
    method = 'lm', 
    formula = y~x,
    size = .5,
    colour = "black",
    se = FALSE) +
  geom_point(
    size = 3
  ) +
  stat_poly_eq(
    formula =  y ~ x,
    eq.with.lhs = c(
      "italic(log(R[1]/R[2]))~`=`~", 
      "italic(log(T[1]/T[2]))~`=`~"),
    eq.x.rhs = "~italic(X)",
    aes(
      label = paste(..eq.label.., sep = "~~~")),
    parse = TRUE)

score 1 · Accepted Answer · answered Feb 10 '21 at 04:45

This can be achieved with a slightly modified version of the underlying StatPolyEq:

The code used is the same code as what's in the question, swopping stat_poly_eq for stat_poly_eq2 as defined below:

stat_poly_eq2 <- function (mapping = NULL, data = NULL, geom = "text_npc", 
    position = "identity", ..., formula = NULL, eq.with.lhs = TRUE, 
    eq.x.rhs = NULL, coef.digits = 3, rr.digits = 2, f.digits = 3, 
    p.digits = 3, label.x = "left", label.y = "top", 
    label.x.npc = NULL, label.y.npc = NULL, hstep = 0, vstep = NULL, 
    output.type = "expression", na.rm = FALSE, show.legend = FALSE, 
    inherit.aes = TRUE) 
{
    if (!is.null(label.x.npc)) {
        stopifnot(grepl("_npc", geom))
        label.x <- label.x.npc
    }
    if (!is.null(label.y.npc)) {
        stopifnot(grepl("_npc", geom))
        label.y <- label.y.npc
    }
    ggplot2::layer(data = data, mapping = mapping, stat = StatPolyEq2, 
        geom = geom, position = position, show.legend = show.legend, 
        inherit.aes = inherit.aes, params = list(formula = formula, 
            eq.with.lhs = eq.with.lhs, eq.x.rhs = eq.x.rhs, coef.digits = coef.digits, 
            rr.digits = rr.digits, f.digits = f.digits, p.digits = p.digits, 
            label.x = label.x, label.y = label.y, hstep = hstep, 
            vstep = ifelse(is.null(vstep), ifelse(grepl("label", 
                geom), 0.125, 0.075), vstep), npc.used = grepl("_npc", 
                geom), output.type = output.type, na.rm = na.rm, 
            ...))
}

StatPolyEq2 <- ggproto(
  "StatPolyEq2", StatPolyEq,
  compute_group = function (data, scales, formula, weight, eq.with.lhs, eq.x.rhs, 
                            coef.digits, rr.digits, f.digits, p.digits, label.x, label.y, 
                            hstep, vstep, npc.used, output.type, na.rm) 
  {
    force(data)
    if (length(unique(data$x)) < 2) {
      return(tibble::new_tibble())
    }
    output.type <- if (!length(output.type)) {
      "expression"
    }
    else {
      tolower(output.type)
    }
    stopifnot(output.type %in% c("expression", "text", "markdown", 
                                 "numeric", "latex", "tex", "tikz"))
    if (is.null(data$weight)) {
      data$weight <- 1
    }
    if (exists("grp.label", data)) {
      if (length(unique(data[["grp.label"]])) > 1L) {
        warning("Non-unique value in 'data$grp.label' for group.")
        grp.label <- ""
      }
      else {
        grp.label <- data[["grp.label"]][1]
      }
    }
    else {
      grp.label <- ""
    }
    if (is.null(eq.x.rhs)) {
      if (output.type == "expression") {
        eq.x.rhs <- "~italic(x)"
      }
      else if (output.type == "markdown") {
        eq.x.rhs <- "_x_"
      }
      else {
        eq.x.rhs <- " x"
      }
    }
    group.idx <- abs(data$group[1])
    if (length(label.x) >= group.idx) {
      label.x <- label.x[group.idx]
    }
    else if (length(label.x) > 0) {
      label.x <- label.x[1]
    }
    if (length(label.y) >= group.idx) {
      label.y <- label.y[group.idx]
    }
    else if (length(label.y) > 0) {
      label.y <- label.y[1]
    }
    if (length(eq.with.lhs) >= group.idx) {
      eq.with.lhs <- eq.with.lhs[group.idx]
    }
    lm.args <- list(quote(formula), data = quote(data), weights = quote(weight))
    mf <- do.call(stats::lm, lm.args)
    mf.summary <- summary(mf)
    rr <- mf.summary$r.squared
    adj.rr <- mf.summary$adj.r.squared
    AIC <- AIC(mf)
    BIC <- BIC(mf)
    if ("fstatistic" %in% names(mf.summary)) {
      f.value <- mf.summary$fstatistic["value"]
      f.df1 <- mf.summary$fstatistic["numdf"]
      f.df2 <- mf.summary$fstatistic["dendf"]
      p.value <- 1 - stats::pf(q = f.value, f.df1, f.df2)
    }
    else {
      f.value <- f.df1 <- f.df2 <- p.value <- NA_real_
    }
    if (output.type == "numeric") {
      z <- tibble::tibble(coef.ls = list(summary(mf)[["coefficients"]]), 
                          coefs = list(stats::coefficients(mf)), r.squared = rr, 
                          adj.r.squared = adj.rr, f.value = f.value, f.df1 = f.df1, 
                          f.df2 = f.df2, p.value = p.value, AIC = AIC, BIC = BIC, 
                          rr.label = "")
    }
    else {
      coefs <- stats::coef(mf)
      formula.rhs.chr <- as.character(formula)[3]
      if (grepl("-1", formula.rhs.chr) || grepl("- 1", formula.rhs.chr)) {
        coefs <- c(0, coefs)
      }
      stopifnot(coef.digits > 0)
      if (coef.digits < 3) {
        warning("'coef.digits < 3' Likely information loss!")
      }
      eq.char <- as.character(signif(polynom::as.polynomial(coefs), 
                                     coef.digits))
      eq.char <- gsub("+ x", paste("+ 1.", 
                                   stringr::str_dup("0", coef.digits - 1L), 
                                   "*x", sep = ""),
                      eq.char, fixed = TRUE)
      eq.char <- gsub("e([+-]?[0-9]*)", "%*%10^{\\1}", eq.char)
      if (output.type %in% c("latex", "tex", "tikz", "markdown")) {
        eq.char <- gsub("*", " ", eq.char, fixed = TRUE)
      }
      if (is.character(eq.with.lhs)) {
        lhs <- eq.with.lhs
        eq.with.lhs <- TRUE
      }
      else if (eq.with.lhs) {
        if (output.type == "expression") {
          lhs <- "italic(y)~`=`~"
        }
        else if (output.type %in% c("latex", "tex", "tikz", "text")) {
          lhs <- "y = "
        }
        else if (output.type == "markdown") {
          lhs <- "_y_ = "
        }
      }
      if (eq.with.lhs) {
        eq.char <- paste(lhs, eq.char, sep = "")
      }
      stopifnot(rr.digits > 0)
      if (rr.digits < 2) {
        warning("'rr.digits < 2' Likely information loss!")
      }
      stopifnot(p.digits > 0)
      if (f.digits < 2) {
        warning("'f.digits < 2' Likely information loss!")
      }
      stopifnot(p.digits > 0)
      if (p.digits < 2) {
        warning("'p.digits < 2' Likely information loss!")
      }
      rr.char <- format(rr, digits = rr.digits)
      adj.rr.char <- format(adj.rr, digits = rr.digits)
      AIC.char <- sprintf("%.4g", AIC)
      BIC.char <- sprintf("%.4g", BIC)
      f.value.char <- as.character(signif(f.value, digits = f.digits))
      f.df1.char <- as.character(f.df1)
      f.df2.char <- as.character(f.df2)
      p.value.char <- as.character(signif(p.value, digits = p.digits))
      if (output.type == "expression") {
        z <- tibble::tibble(eq.label = gsub("x", eq.x.rhs, eq.char, fixed = TRUE), 
                            rr.label = paste("italic(R)^2", rr.char, sep = "~`=`~"), 
                            adj.rr.label = paste("italic(R)[adj]^2", 
                                                 adj.rr.char, sep = "~`=`~"),
                            AIC.label = paste("AIC", AIC.char, sep = "~`=`~"), 
                            BIC.label = paste("BIC", BIC.char, sep = "~`=`~"),
                            f.value.label = ifelse(is.na(f.value), character(0L), 
                                                   paste("italic(F)[", f.df1.char, 
                                                         "*\",\"*", f.df2.char, "]~`=`~", f.value.char, 
                                                         sep = "")), 
                            p.value.label = ifelse(is.na(p.value), character(0L), 
                                                   paste("italic(P)", ifelse(p.value < 0.001, "0.001", p.value.char), 
                                                         sep = ifelse(p.value <  0.001, "~`<=`~", "~`=`~"))), 
                            grp.label = grp.label)
      }
      else if (output.type %in% c("latex", "tex", "text", "tikz")) {
        z <- tibble::tibble(eq.label = gsub("x", eq.x.rhs, 
                                            eq.char, fixed = TRUE),
                            rr.label = paste("R^2", rr.char, sep = " = "), 
                            adj.rr.label = paste("R_{adj}^2", adj.rr.char, sep = " = "), 
                            AIC.label = paste("AIC", AIC.char, sep = " = "), 
                            BIC.label = paste("BIC", BIC.char, sep = " = "),
                            f.value.label = ifelse(is.na(f.value), character(0L), 
                                                   paste("F_{", f.df1.char, ",", f.df2.char, "} = ", f.value.char, sep = "")), 
                            p.value.label = ifelse(is.na(p.value), character(0L), 
                                                   paste("P", p.value.char, 
                                                         sep = ifelse(p.value < 0.001, " \\leq ", " = "))),
                            grp.label = grp.label)
      }
      else if (output.type == "markdown") {
        z <- tibble::tibble(eq.label = gsub("x", eq.x.rhs, eq.char, fixed = TRUE),
                            rr.label = paste("_R_<sup>2</sup>", rr.char, sep = " = "), 
                            adj.rr.label = paste("_R_<sup>2</sup><sub>adj</sub>", 
                                                 adj.rr.char, sep = " = "),
                            AIC.label = paste("AIC", AIC.char, sep = " = "), 
                            BIC.label = paste("BIC", BIC.char, sep = " = "), 
                            f.value.label = ifelse(is.na(f.value), character(0L), 
                                                   paste("_F_<sub>", f.df1.char, ",", f.df2.char, "</sub> = ", f.value.char, 
                                                         sep = "")), 
                            p.value.label = ifelse(is.na(p.value), character(0L), 
                                                   paste("_P_", p.value.char, 
                                                         sep = ifelse(p.value < 0.001, " &le; ", " = "))), 
                            grp.label = grp.label)
      }
      else {
        warning("Unknown 'output.type' argument: ", output.type)
      }
    }
    if (npc.used) {
      margin.npc <- 0.05
    }
    else {
      margin.npc <- 0
    }
    if (is.character(label.x)) {
      label.x <- compute_npcx(x = label.x, group = group.idx, 
                              h.step = hstep, margin.npc = margin.npc)
      if (!npc.used) {
        x.expanse <- abs(diff(range(data$x)))
        x.min <- min(data$x)
        label.x <- label.x * x.expanse + x.min
      }
    }
    if (is.character(label.y)) {
      label.y <- compute_npcy(y = label.y, group = group.idx, 
                              v.step = vstep, margin.npc = margin.npc)
      if (!npc.used) {
        y.expanse <- abs(diff(range(data$y)))
        y.min <- min(data$y)
        label.y <- label.y * y.expanse + y.min
      }
    }
    if (npc.used) {
      z$npcx <- label.x
      z$x <- NA_real_
      z$npcy <- label.y
      z$y <- NA_real_
    }
    else {
      z$x <- label.x
      z$npcx <- NA_real_
      z$y <- label.y
      z$npcy <- NA_real_
    }
    z
  })

Modifications from the original:

stat_poly_eq2: stat = StatPolyEq2 instead of stat = StatPolyEq;
StatPolyEq2: added if (length(eq.with.lhs) >= group.idx) {eq.with.lhs <- eq.with.lhs[group.idx]} clause in lines 58-60, so that if multiple equation expressions are given, only the one whose order corresponds to the specific group is used.

Hi @Z.Lin thanks for this incredible answer. I'm didn't realise the solution would require tinkering with the bowels of the plotting function. Solution works well. Maybe it's worth suggesting as an update to the package? — vengefulsealion, Feb 10 '21 at 22:30

How can I manually alter the LHS of two equation annotations in ggplot?

1 Answers1