3

I am using the ggplot_dual_axis() function from the Stack Overflow answer at How can I put a transformed scale on the right side of a ggplot2? in order to display two Y axes, one on the right and one on the left. The function is a big hacky mess that basically overlays two plots on top of one another, one with a Y axis on the left and one with a Y axis on the right. However, it doesn't seem to take all elements from the right plot, in particular not the legend. The reason this matters is that it displays the left plot first, causing the grid lines to be written over the legend. I don't understand the code in ggplot_dual_axis() enough to fix it. Can someone who understands it help me?

Here's the code I have:

library(ggplot2)
library(reshape2)
library(scales) # for format_format

# See https://stackoverflow.com/questions/18989001/how-can-i-put-a-transformed-scale-on-the-right-side-of-a-ggplot2
ggplot_dual_axis <- function(lhs, rhs, axis.title.y.rhs = "rotate") {
  # 1. Fix the right y-axis label justification
    rhs <- rhs + theme(axis.text.y = element_text(hjust = 0))
  # 2. Rotate the right y-axis label by 270 degrees by default
    if (missing(axis.title.y.rhs) | 
        axis.title.y.rhs %in% c("rotate", "rotated")) {
        rhs <- rhs + theme(axis.title.y = element_text(angle = 270)) 
    }
  # 3a. Use only major grid lines for the left axis
    lhs <- lhs + theme(panel.grid.minor = element_blank())
  # 3b. Use only major grid lines for the right axis
  #     force transparency of the backgrounds to allow grid lines to show
    rhs <- rhs + theme(panel.grid.minor = element_blank(), 
        panel.background = element_rect(fill = "transparent", colour = NA), 
        plot.background = element_rect(fill = "transparent", colour = NA))
# Process gtable objects
  # 4. Extract gtable
    library("gtable") # loads the grid package
    g1 <- ggplot_gtable(ggplot_build(lhs))
    g2 <- ggplot_gtable(ggplot_build(rhs))
  # 5. Overlap the panel of the rhs plot on that of the lhs plot
    pp <- c(subset(g1$layout, name == "panel", se = t:r))
    g <- gtable_add_grob(g1, 
        g2$grobs[[which(g2$layout$name == "panel")]], pp$t, pp$l, pp$b, pp$l)
  # Tweak axis position and labels
    ia <- which(g2$layout$name == "axis-l")
    ga <- g2$grobs[[ia]]
    ax <- ga$children[["axis"]]  # ga$children[[2]]
    ax$widths <- rev(ax$widths)
    ax$grobs <- rev(ax$grobs)
    ax$grobs[[1]]$x <- ax$grobs[[1]]$x - unit(1, "npc") + unit(0.15, "cm")
    g <- gtable_add_cols(g, g2$widths[g2$layout[ia, ]$l], length(g$widths) - 1)
    g <- gtable_add_grob(g, ax, pp$t, length(g$widths) - 1, pp$b)
    g <- gtable_add_grob(g, g2$grobs[[7]], pp$t, length(g$widths), pp$b)
  # Display plot with arrangeGrob wrapper arrangeGrob(g)
    library("gridExtra")
    grid.newpage()
    return(arrangeGrob(g))
}

####### Set up data
t = read.table("beadle-enwiki-norestrict-50-50.nb.uniform.dat", header=TRUE)
colnames(t) = c("acc", "mean", "median", "degrees")
t2 = read.table("beadle-enwiki-restrict-50-50.nb.uniform.dat", header=TRUE)
colnames(t2) = c("acc.restrict", "mean.restrict", "median.restrict", "degrees")
# Convert from wide to long format (opposite is 'cast');
# cols will be 'x', 'metric' and 'value'
data = melt(unique(t), id = "degrees", variable.name="metric", value.name="value", na.rm=T)
data2 = melt(unique(t2), id = "degrees", variable.name="metric", value.name="value", na.rm=T)
data = rbind(data, data2)
data[data$metric == "acc","value"] = 3000 - data[data$metric == "acc","value"] * 100 
data[data$metric == "acc.restrict","value"] = 3000 - data[data$metric == "acc.restrict","value"] * 100 
# Extract only those where metric is Acc or Median
data = subset(data, metric=="acc" | metric=="median" | metric=="acc.restrict" |
    metric=="median.restrict")
# Create a data frame to simulate a horizontal line for Naive Bayes
# instead of geom_hline(), which produces two legends in a messed-up way,
# with metric values 1 and 2 duplicated in the two. You can eliminate the
# duplication by taking out 'linetype=metric' in the call to 'ggplot' below.
#newdf = data.frame(degrees=c(-Inf, Inf), value=84.49, metric="Naive Bayes")

create_ggplot = function() {
  return(ggplot(data, aes(degrees, value, group=metric, color=metric, shape=metric, linetype=metric)) +
  scale_x_sqrt(breaks=c(0.1,0.25,0.5,1,2,3,4,5), labels=format_format(drop0trailing=TRUE)) +
  # Override line types; not totally necessary
  scale_linetype_manual(values = c(1,3,1,3)) +
  # Override shapes; important to have NA for third (Naive Bayes) shape
  scale_shape_manual(values = c(16,17,18,21)) +
  # Override colors; not totally necessary
  scale_color_manual(values = c("red", "blue","orange","black")) +
  # Set the title on the legend. All three have to agree or we get multiple legends.
  # We can also set these as the first parameters to scale_*_manual(), e.g.
  # scale_linetype_manual("metric", values = c(1,3,1,3))
  labs(color = "metric", shape = "metric", linetype = "metric")
  )
}

####### Plot data
p1 = (create_ggplot()
  + xlab(NULL)
  + ylab("Kilometers")
  + scale_y_continuous(trans="reverse", breaks = seq(from = 0, to = 3000, by = 500))
  + geom_line(linetype = "blank")
  + geom_point() # Draw points for same
  # Put the legend inside of the plot ...
  + theme(legend.position=c(0.85,0.82))
  # ... and make the background transparent.
  + theme(legend.background=element_blank())
  )

p2 = (create_ggplot()
  + xlab("K-d subdivision factor")
  + ylab("Acc@161 (pct)")
  + scale_y_continuous(trans = "reverse",
        labels = c("30%", "25%", "20%", "15%", "10%", "5%", "0%"),
        breaks = seq(from = 0, to = 3000, by = 500))
  + geom_point()
  + geom_smooth(se = FALSE, span=0.2)
  #+ theme(legend.position=c(0.8,0.4))
  )

p <- ggplot_dual_axis(lhs = p1, rhs = p2)
print(p)

Here's what get:

Resulting plot

Notice how the grid lines go through the legend; it's especially noticeable in the words.

Also, when I use pdf() and dev.off() so save the image as PDF, I get 3 pages, the first two of which are blank. Any idea how to fix that and get only one page?

Thanks!!

BTW, here's the file beadle-enwiki-restrict-50-50.nb.uniform.dat:

Acc@161  Mean   Median Param
 26.47 1196.18  876.86 0.10
 25.98 1248.06  876.86 0.15
 26.47 1220.19  895.41 0.25
 25.00 1160.03  828.01 0.35
 28.92 1070.64  718.03 0.50
 29.41 1017.81  714.61 0.60
 30.39 1045.87  658.71 0.70
 31.37  970.27  670.57 0.75
 31.86  970.59  615.73 0.80
 31.37 1034.13  693.35 0.85
 32.84 1006.79  580.53 0.90
 30.39  970.15  670.58 0.95
 28.43 1043.27  734.25 1.05
 30.39  948.51  556.36 1.10
 29.90  961.27  628.30 1.15
 33.33 1025.30  655.12 1.20
 33.33 1025.30  655.12 1.20
 33.82  905.29  531.95 1.25
 29.90 1015.78  625.00 1.30
 28.43  959.12  570.56 1.35
 29.90  951.32  600.57 1.40
 28.92  920.92  603.40 1.45
 28.43  973.23  627.40 1.50
 31.86  905.70  504.89 1.55
 31.86  923.96  629.65 1.60
 32.84  948.97  576.03 1.65
 30.88  895.25  540.52 1.70
 29.41  929.82  655.11 1.75
 28.43 1001.63  698.88 1.80
 25.98 1002.50  639.88 1.85
 29.90  916.08  618.93 1.90
 28.92  912.40  571.47 1.95
 29.41 1013.34  652.83 1
 27.45  890.13  552.36 2.50
 27.45  890.13  552.36 2.50
 27.45  916.58  603.20 2
 27.45  916.58  603.20 2
 23.53  964.79  687.81 3.50
 26.96  933.72  634.51 3
 26.96  933.72  634.51 3
 15.69  998.84  671.73 4.50
 15.69  998.84  671.73 4.50
 18.63 1002.80  759.07 4
 18.63 1002.80  759.07 4
 13.73  981.85  662.07 5

And here's the file beadle-enwiki-norestrict-50-50.nb.uniform.dat:

Acc@161  Mean   Median Param
 23.04 3922.81 1825.83 0.10
 22.06 3888.09 1806.71 0.15
 24.51 3490.37 1648.58 0.25
 22.55 4039.88 1758.75 0.35
 25.49 4125.88 1748.56 0.50
 25.49 4180.57 1757.72 0.60
 25.98 4320.85 1762.17 0.70
 27.94 3915.26 1110.75 0.75
 27.94 3895.97 1215.07 0.80
 25.00 4269.12 1765.45 0.85
 28.43 3877.07 1264.86 0.90
 26.47 4010.01 1261.95 0.95
 25.98 4338.20 1640.40 1.05
 25.98 3800.07 1115.98 1.10
 26.47 3924.18 1134.45 1.15
 25.98 3992.77 1400.51 1
 28.43 3966.25 1581.52 1.20
 29.90 3946.38 1169.55 1.25
 26.96 4036.76 1570.82 1.30
 25.00 4128.11 1597.96 1.35
 24.51 4293.44 1556.12 1.40
 23.04 4448.78 1725.62 1.45
 21.57 4401.99 1773.66 1.50
 26.96 3697.66 1066.88 1.55
 26.96 4033.89 1144.61 1.60
 27.45 3982.82 1081.80 1.65
 26.96 4050.45 1251.99 1.70
 25.49 3942.11 1117.52 1.75
 24.51 4265.03 1238.81 1.80
 23.53 3835.24 1250.52 1.85
 23.53 4123.50 1563.50 1.90
 24.02 4138.78 1258.69 1.95
 24.51 4321.01 1623.01 2
 24.02 4099.53 1216.75 2.50
 23.04 4294.64 1280.79 3
 20.59 4097.54 1262.57 3.50
 14.71 4612.40 1500.24 4
 11.76 5001.09 2029.41 4.50
 11.76 4913.45 1811.31 5
Community
  • 1
  • 1
Urban Vagabond
  • 7,282
  • 3
  • 28
  • 31
  • Is it possible to create a minimal reproducible example? Apart from that, seems that the legend is produced by `lhs`, so it may be possible to swap `lhs` and `rhs` in the code, so that `rhs` will be drawn first. – tonytonov Jan 28 '15 at 07:46
  • I can't just swap lhs and rhs that easily because the code in ggplot_dual_axis() is drawing rhs on top of lhs but doesn't draw the legend from rhs. Evidently the legend is stored in a different grob from the "panel", and it's only the "panel" grob from rhs that's getting added to the lhs graph. – Urban Vagabond Jan 28 '15 at 07:52
  • I added the data to make it reproducible. – Urban Vagabond Jan 28 '15 at 07:59

1 Answers1

5

You can add the guide-box of the lhs plot on on top of the plot using gtable_add_grob. This looks like that

dimGB1 <- c(subset(g1$layout, name == "guide-box", se = t:r))
g <- gtable_add_grob(g, 
                     g1$grobs[[which(g1$layout$name == "guide-box")]], 
                     dimGB1$t, dimGB1$l, dimGB1$b, dimGB1$l, z=-Inf)

Note that z = -Inf puts the new grob on top. The whole function then will look as follows:

##' function named ggplot_dual_axis()
##' Takes 2 ggplot plots and makes a dual y-axis plot
##' function takes 2 compulsory arguments and 1 optional argument
##' arg lhs is the ggplot whose y-axis is to be displayed on the left
##' arg rhs is the ggplot whose y-axis is to be displayed on the right
##' arg 'axis.title.y.rhs' takes value "rotate" to rotate right y-axis label
##' The function does as little as possible, namely:
##'  # display the lhs plot without minor grid lines and with a
##'  transparent background to allow grid lines to show
##'  # display the rhs plot without minor grid lines and with a
##'  secondary y axis, a rotated axis label, without minor grid lines
##'  # justify the y-axis label by setting 'hjust = 0' in 'axis.text.y'
##'  # rotate the right plot 'axis.title.y' by 270 degrees, for symmetry
##'  # rotation can be turned off with 'axis.title.y.rhs' option
##' Source: http://stackoverflow.com/questions/18989001/how-can-i-put-a-transformed-scale-on-the-right-side-of-a-ggplot2
##'  
ggplot_dual_axis <- function(lhs, rhs, axis.title.y.rhs = "rotate") {
  # 1. Fix the right y-axis label justification
  rhs <- rhs + theme(axis.text.y = element_text(hjust = 0))
  # 2. Rotate the right y-axis label by 270 degrees by default
  if (missing(axis.title.y.rhs) | 
        axis.title.y.rhs %in% c("rotate", "rotated")) {
    rhs <- rhs + theme(axis.title.y = element_text(angle = 270)) 
  }
  # 3a. Use only major grid lines for the left axis
  lhs <- lhs + theme(panel.grid.minor = element_blank())
  # 3b. Use only major grid lines for the right axis
  #     force transparency of the backgrounds to allow grid lines to show
  rhs <- rhs + theme(panel.grid.minor = element_blank(), 
                     panel.background = element_rect(fill = "transparent", colour = NA), 
                     plot.background = element_rect(fill = "transparent", colour = NA))
  # Process gtable objects
  # 4. Extract gtable
  library("gtable") # loads the grid package
  g1 <- ggplot_gtable(ggplot_build(lhs))
  g2 <- ggplot_gtable(ggplot_build(rhs))
  # 5. Overlap the panel of the rhs plot on that of the lhs plot
  pp <- c(subset(g1$layout, name == "panel", se = t:r))
  g <- gtable_add_grob(g1, 
                       g2$grobs[[which(g2$layout$name == "panel")]], pp$t, pp$l, pp$b, pp$l)
  # Tweak axis position and labels
  ia <- which(g2$layout$name == "axis-l")
  ga <- g2$grobs[[ia]]
  ax <- ga$children[["axis"]]  # ga$children[[2]]
  ax$widths <- rev(ax$widths)
  ax$grobs <- rev(ax$grobs)
  ax$grobs[[1]]$x <- ax$grobs[[1]]$x - unit(1, "npc") + unit(0.15, "cm")
  g <- gtable_add_cols(g, g2$widths[g2$layout[ia, ]$l], length(g$widths) - 1)
  g <- gtable_add_grob(g, ax, pp$t, length(g$widths) - 1, pp$b)
  g <- gtable_add_grob(g, g2$grobs[[7]], pp$t, length(g$widths), pp$b)
  # add legend on top
  if ("guide-box" %in% g1$layout$name){
    dimGB1 <- c(subset(g1$layout, name == "guide-box", se = t:r))
    g <- gtable_add_grob(g, 
                         g1$grobs[[which(g1$layout$name == "guide-box")]], 
                         dimGB1$t, dimGB1$l, dimGB1$b, dimGB1$l, z=-Inf)
  }
  # Display plot with arrangeGrob wrapper arrangeGrob(g)
  library("gridExtra")
  grid.newpage()
  return(arrangeGrob(g))
}
shadow
  • 21,823
  • 4
  • 63
  • 77
  • Thanks! That changed things so I now need to put the smoothing lines in the left graph (g1) but things now work fine when you do that. – Urban Vagabond Jan 29 '15 at 03:15