2

So I am trying to tackle the following but I may have started down the wrong road.

As these sample sizes increase, I need to update the y-limits so the highest bar in geom_histogram() doesn't go off the top. The especially happens if the st. dev. is set near 0.

This is literally my second day working with Shiny and reactive applications so I feel I've gotten myself into a pickle.

I think I need to save the ggplot() objects and then update their ylimit reactively with the value of the largest bar from the last histogram. Just not sure if I can do that the way this thing is set up now.

(I am realizing I had a similar problem over 2 years ago)

ggplot2 Force y-axis to start at origin and float y-axis upper limit

This is different because it is the height of a histogram that needs to tell the y-axis to increase, not the largest data value. Also, because Shiny.

My server.R function looks like

    library(shiny)
library(ggplot2)
library(extrafont)


# Define server logic for random distribution application
function(input, output, session) {


    data <- reactive({
            set.seed(123)

             switch(input$dist, 
                    norm = rnorm(input$n, 
                                 sd = input$stDev),
                    unif = runif(input$n,-4,4),
                    lnorm = rlnorm(input$n)
                    )
                 })

    height="100%"

    plotType <- function(blah, maxVal, stDev, n, type) {

      roundUp <- function(x) 10^ceiling(log10(x)+0.001)
      maxX<- roundUp(maxVal)
      breakVal<-max(floor(maxX/10),1)

      switch(type,
             norm =  ggplot(as.data.frame(blah), aes(x=blah))+
               geom_histogram(binwidth = 0.2,
                              boundary = 0, 
                              colour = "black") +
               scale_y_continuous(limits = c(0, maxX),
                                  breaks = seq(0, maxX, breakVal), 
                                  expand = c(0, 0)) +
               scale_x_continuous(breaks = seq(-4, 4, 1),
                                  expand = c(0, 0)) +
               theme_set(theme_bw(base_size = 40) +
               ylab("Frequency")+
               xlab("")+
               coord_cartesian(xlim=c(-4, 4))+
               ggtitle(paste("n = ",n, "St Dev =", stDev,"  Normal Distribution ", sep = ' ')),

             unif =  ggplot(as.data.frame(blah), aes(x=blah))+
               geom_histogram(binwidth=0.1, boundary =0,colour = "black")+
               scale_y_continuous(limits = c(0,roundUp(maxVal*(3/stDev))),
                                  breaks=seq(0,roundUp(maxVal*(3/stDev)), roundUp(maxVal*(3/stDev))/10),
                                  expand = c(0, 0))+
               scale_x_continuous(breaks=seq(-4,4,1),expand = c(0, 0))+
               theme_set(theme_bw(base_size = 40))+
               ylab("Frequency")+xlab("")+
               coord_cartesian(xlim=c(-4,4))+
               ggtitle(paste("n = ",n, "     Uniform Distribution ", sep = ' ')),


             lnorm = ggplot(as.data.frame(blah), aes(x=blah))+
               geom_histogram(binwidth=0.2, boundary =0,colour = "black")+
               scale_y_continuous(limits = c(0,maxX),
                                  breaks=seq(0,maxX, breakVal),
                                  expand = c(0, 0))+
               scale_x_continuous(breaks=seq(0,8,1),expand = c(0, 0))+
               theme_set(theme_bw(base_size = 40))+
               ylab("Frequency")+xlab("")+
               coord_cartesian(xlim=c(0,8))+
                 ggtitle(paste("n = ",n, "     Log-Normal Distribution ", sep = ' '))
      )

    }

    observe({ 
      updateSliderInput(session, "n", 
                        step = input$stepSize,
                        max=input$maxN)
             })
    plot.dat <- reactiveValues(main=NULL, layer1=NULL)

     #plotType(data, maxVal, stDev, n, type)
    output$plot <- renderPlot({ 
                                plotType(data(),

                                switch(input$dist,
                                       norm = max((input$n)/7,1),
                                       unif = max((input$n)/50,1),
                                       lnorm =max((input$n)/8,1)
                                          ), 

                                input$stDev, 
                                input$n,
                                input$dist) })


  # Generate a summary of the data
  output$summary <- renderTable(
    as.array(round(summary(data())[c(1,4,6)],5)),
    colnames=FALSE
  )

  output$stDev <- renderTable(
    as.array(sd(data())),
    colnames=FALSE
  )

  # Generate an HTML table view of the data
  output$table <- renderTable({
    data.frame(x=data())
  })

}

And my ui.R looks like

  library(shiny)
library(shinythemes)
library(DT)


# Define UI for random distribution application 
shinyUI(fluidPage(theme = shinytheme("slate"),

  # Application title
  headerPanel("Michael's Shiny App"),

  # Sidebar with controls to select the random distribution type
  # and number of observations to generate. Note the use of the
  # br() element to introduce extra vertical spacing
  sidebarLayout(
    sidebarPanel(
      tags$head(tags$style("#plot{height:90vh !important;}")),
      radioButtons("dist", "Distribution:",
                   c("Standard Normal" = "norm",
                     "Uniform" = "unif",
                     "Log-normal" = "lnorm")),
      br(),

      numericInput("stepSize", "Step", 1, min = 1, max = NA, step = NA,
                   width = NULL),
      numericInput("maxN", "Max Sample Size", 50, min = NA, max = NA, step = NA,
                   width = NULL),

      br(),

        sliderInput("n", 
                  "Number of observations:", 
                  value = 0,
                  min = 1, 
                  max = 120000,
                  step = 5000,
                  animate=animationOptions(interval=1200, loop=T)),

      sliderInput("stDev", 
                  "Standard Deviation:", 
                  value = 1,
                  min = 0, 
                  max = 3,
                  step = 0.1,
                  animate=animationOptions(interval=1200, loop=T)),

      p("Summary Statistics"),         
      tabPanel("Summary", tableOutput("summary")),
      p("Sample St. Dev."),
      tabPanel("Standard Dev", tableOutput("stDev")),
      width =2
    ),

    # Show a tabset that includes a plot, summary, and table view
    # of the generated distribution
    mainPanel(
      tabsetPanel(type = "tabs", 
                  tabPanel("Plot", plotOutput("plot")), 
                  tabPanel("Table", tableOutput("table"))
      ))

  )))

The whole thing has a lot of redundancy. What I want to do, is once the biggest bar on the histogram gets close to the upper y-limit, I want the ylimit to jump to the next power of 10.

Any suggestions are greatly appreciated.

Update Loosely, the solution that I ended up using is as follows: In the renderPlot() function, you need to save the ggplot object. Then as mentioned below, access the ymax value (still within renderPlot()),

 ggplot_build(norm)$layout$panel_ranges[[1]]$y.range[[2]]

and then use that to update the y-axis. I used the following function to make the axis limit "nice".

roundUpNice <- function(x, nice=c(1,2,4,5,6,8,10)) {
        10^floor(log10(x)) * nice[[which(x <= 10^floor(log10(x)) * nice)[[1]]]]
      }

Then updating the y-axis. (still within renderplot())

   ymaxX = roundUpNice(ggplot_build(norm)$layout$panel_ranges[[1]]$y.range[[2]])


  norm+scale_y_continuous(limits = c(0, max(ymaxX, 20)), 
                        expand=c(0,0))
Michael
  • 1,537
  • 6
  • 20
  • 42
  • Except for a small typo in `server.R` (a missing `+` after the last `ylab`) it seems to compile and do what you are asking to make it do. So I do not know what your question is. `maxX` is set using the function `roundUp` to increase the `ylimit` in `scale_y_continuous` by the factor of 10 you were looking for. Right? – Mike Wise Apr 23 '17 at 03:30
  • One thing I will say is that you should factor those `ggplot` plot building statements into seperate functions. Other than that it is not bad at all. – Mike Wise Apr 23 '17 at 03:33
  • It does for a bit because my constants in there do somewhat what I want. The two issues are the bars will go over the top sometimes, (depending on the value of the SD you choose), and also, it will up the y-max before the bars are anywhere near it. I would really like to be able to just grab the max_height of the bars the last plot made, and just use them to adjust the upper value of the y-axis. It would just be 10-times cleaner and work exactly right. – Michael Apr 23 '17 at 03:34
  • Can you provide some settings for which it shows that behavior, because it looked fine for me. – Mike Wise Apr 23 '17 at 03:36
  • Also adding an `na.rm=T` as a parameter to your `geom_histogram` call will eliminate all those irritating `NA` warnings. – Mike Wise Apr 23 '17 at 03:41
  • For example, for high values of "step" (like over 1000) and "number of observations" say over 10000, it will "refresh" too early. I'd like to be able to see the bars almost touch the top, but the ylim get extended before halfway usually. – Michael Apr 23 '17 at 03:44
  • The "switch()" statement that I have in the renderPlot() would be totally unnecessary if I could just use the heights of the bins to adjust the y limit. – Michael Apr 23 '17 at 03:46
  • 1
    ok, I see the problem, you need to determine how big the histogram bars are going to be before you can set the y scale limits appropriately. I don't know how to get at those values though. Please add that to your problem statement (and highlight it), I need to think about it and do some research, might take a couple days unless someone beats me to it. – Mike Wise Apr 23 '17 at 03:57
  • @MikeWise the histogram values can be accessed through `ggplot_build()` as in the below solution – C8H10N4O2 Aug 29 '17 at 02:59

2 Answers2

3

First, store the histogram (default axes).

p1 <- ggplot(...) + geom_histogram() 

Then, Use ggplot_build(p1) to access the heights of the histogram bars. For example,

 set.seed(1)
 df <- data.frame(x=rnorm(10000))
 library(ggplot2)
 p1 <- ggplot(df, aes(x=x)) + geom_histogram()
 bar_max <- max(ggplot_build(p1)[['data']][[1]]$ymax) # where 1 is index 1st layer
 bar_max # returns 1042

You will need a function to tell you what the next power of 10 is, for example:

nextPowerOfTen <- function(x) as.integer(floor(log10(x) + 1))
# example: nextPowerOfTen(999) # returns 3 (10^3=1000)

You will want to check whether the bar_max is within some margin (based on your preference) of the next power of 10. If an adjustment is triggered, you can simply do p1 + scale_y_continuous(limits=c(0,y_max_new)).

C8H10N4O2
  • 18,312
  • 8
  • 98
  • 134
  • 1
    I wasn't able to replicate your exact problem in Shiny -- the axes generally looked OK to me, but if you give a specific combination of inputs and what you think **ought** to happen, I can try to show a before/after. – C8H10N4O2 Aug 29 '17 at 02:56
  • My applet is online here https://sulock.shinyapps.io/NavbarExample/ (That is a different version that the one the above code provides). If you turn the SD on the "normal distribution" tab to about 1 and move the sample size up to 700 or so, you will see all the missing bars from when they extended above the height of the graph and were subsequently dropped. I see how that works without Shiny, but I don't know how to implement it with the context of Shiny. My plot is produced inside a renderPlot({}) function. I use reactive expressions to make the plot. – Michael Aug 29 '17 at 03:32
  • 1
    @Michael your server function is returning a plot. All I am saying is, store this plot to a temp variable (`p1 <- switch(...)` although not necessarily how I'd write it) and apply the steps described above. Then the output of the server function should be the modified plot (`p1 + scale_y_continuous(...)`). – C8H10N4O2 Aug 29 '17 at 03:37
  • Solved! No switch function necessary. You just save the plot and then update the scale_y_continuous() within the renderPlot() function. – Michael Sep 05 '17 at 01:18
1

I found the answer hidden in the "scale_y_continuous()" portion of your code. The app was very close, but in some cases, the data maxed out the y-axis, which made it appear like it was running further than the axis limits as you said.

To fix this problem, the expand argument within the scale_y_continuous section needs to be set to "c(0.05, 0)", instead of "c(0, 0)".

First, I've replicated an example of the graph run-off you were describing by setting the sample size to 50 and standard deviation to 0.3 within your app. After running the original code with "expand=c(0, 0)", we can see we get the following graph:

enter image description here

This problem is fixed by changing the argument to "expand=c(0.05, 0)", as shown here:

enter image description here

For copies of the fixed scripts, see below.

Part 1 -- server.R

library(shiny)
library(ggplot2)
library(extrafont)


# Define server logic for random distribution application
function(input, output, session) {

  data <- reactive({
    set.seed(123)

    switch(input$dist, 
           norm = rnorm(input$n, 
                        sd = input$stDev),
           unif = runif(input$n,-4,4),
           lnorm = rlnorm(input$n)
    )
  })

  height="100%"

  plotType <- function(blah, maxVal, stDev, n, type){

    roundUp <- function(x){10^ceiling(log10(x)+0.001)}
    maxX<- roundUp(maxVal)
    breakVal<-max(floor(maxX/10),1)

    switch(type,
           norm=ggplot(as.data.frame(blah), aes(x=blah)) +
             geom_histogram(binwidth = 0.2,
                            boundary = 0, 
                            colour = "black") +
             scale_y_continuous(limits = c(0, maxX),
                                breaks = seq(0, maxX, breakVal), 
                                expand = c(0.05, 0)) +
             scale_x_continuous(breaks = seq(-4, 4, 1),
                                expand = c(0, 0)) +
             theme_set(theme_bw(base_size = 40)) +
             ylab("Frequency") +
             xlab("") +
             coord_cartesian(xlim=c(-4, 4))+
             ggtitle(paste("n = ",n, "St Dev =", stDev,
                           "  Normal Distribution ", sep = ' ')),
           unif=ggplot(as.data.frame(blah), aes(x=blah)) +
             geom_histogram(binwidth=0.1, boundary=0, colour="black")+
             scale_y_continuous(
               limits = c(0,roundUp(maxVal*(3/stDev))),
               breaks=seq(0,roundUp(maxVal*(3/stDev)),
                                    roundUp(maxVal*(3/stDev))/10),      
               expand = c(0.05, 0))+
               scale_x_continuous(breaks=seq(-4,4,1),expand=c(0, 0)) +
                     theme_set(theme_bw(base_size = 40))+
                     ylab("Frequency")+xlab("")+
                     coord_cartesian(xlim=c(-4,4))+
                     ggtitle(paste("n = ",n,
                             "     Uniform Distribution ", sep = ' ')),
           lnorm=ggplot(as.data.frame(blah), aes(x=blah))+
              geom_histogram(binwidth=0.2,boundary=0, colour="black") +
                     scale_y_continuous(limits=c(o,maxX),
                                        breaks=seq(0,maxX, breakVal),
                                        expand = c(0.05, 0)) +
                     scale_x_continuous(breaks=seq(0,8,1),
                                        expand = c(0, 0)) +
                     theme_set(theme_bw(base_size = 40)) +
                     ylab("Frequency") +
                     xlab("") +
                     coord_cartesian(xlim=c(0,8)) +
                     ggtitle(paste("n = ",n,
                                   "     Log-Normal Distribution ",
                                   sep = ' '))
    )

}

observe({ 
  updateSliderInput(session, "n", 
                    step = input$stepSize,
                    max=input$maxN)
})
plot.dat <- reactiveValues(main=NULL, layer1=NULL)

#plotType(data, maxVal, stDev, n, type)
output$plot <- renderPlot({ 
  plotType(data(),

           switch(input$dist,
                  norm = max((input$n)/7,1),
                  unif = max((input$n)/50,1),
                  lnorm =max((input$n)/8,1)
           ), 

           input$stDev, 
           input$n,
           input$dist) })


# Generate a summary of the data
output$summary <- renderTable(
  as.array(round(summary(data())[c(1,4,6)],5)),
  colnames=FALSE
)

output$stDev <- renderTable(
  as.array(sd(data())),
  colnames=FALSE
)

# Generate an HTML table view of the data
output$table <- renderTable({
  data.frame(x=data())
})

}

Part 2 -- ui.R

library(shiny)
library(shinythemes)
library(DT)

# Define UI for random distribution application 
shinyUI(fluidPage(theme = shinytheme("slate"),

        # Application title
        headerPanel("Michael's Shiny App"),

        # Sidebar with controls to select the random distribution type
        # and number of observations to generate. Note the use of the
        # br() element to introduce extra vertical spacing
        sidebarLayout(
          sidebarPanel(
            tags$head(tags$style("#plot{height:90vh !important;}")),
                      radioButtons("dist", "Distribution:",
                                   c("Standard Normal" = "norm",
                                   "Uniform" = "unif",
                                   "Log-normal" = "lnorm")),
          br(),
          numericInput("stepSize", "Step", 1, 
                       min = 1, max = NA, step = NA, width = NULL),
          numericInput("maxN", "Max Sample Size", 50, 
                       min = NA, max = NA, step = NA,width = NULL),
          br(),

          sliderInput("n", "Number of observations:", value = 0,
                      min = 1, max = 120000, step = 5000,
                      animate=animationOptions(interval=1200, loop=T)),
          sliderInput("stDev","Standard Deviation:",value = 1,
                      min = 0,max = 3,step = 0.1,
                      animate=animationOptions(interval=1200, loop=T)),

          p("Summary Statistics"),         
          tabPanel("Summary", tableOutput("summary")),
          p("Sample St. Dev."),
          tabPanel("Standard Dev", tableOutput("stDev")),
                   width =2),

          # Show a tabset that includes a plot, summary, and table view
          # of the generated distribution
          mainPanel(tabsetPanel(type = "tabs", 
                    tabPanel("Plot", plotOutput("plot")), 
                    tabPanel("Table", tableOutput("table"))
                   ))

)))

Update Loosely, the solution that I ended up using is as follows: In the renderPlot() function, you need to save the ggplot object. Then as mentioned below, access the ymax value (still within renderPlot()),

 ggplot_build(p1)$layout$panel_ranges[[1]]$y.range[[2]]

and then use that to update the y-axis. I used the following function to make the axis limit "nice".

roundUpNice <- function(x, nice=c(1,2,4,5,6,8,10)) {
        if(length(x) != 1) stop("'x' must be of length 1")
        10^floor(log10(x)) * nice[[which(x <= 10^floor(log10(x)) * nice)[[1]]]]
      }
Michael
  • 1,537
  • 6
  • 20
  • 42
www
  • 4,124
  • 1
  • 11
  • 22
  • Ryan, see that gap in your fixed version? There should be a bar there. It got removed because it extends above the top of the graph. – Michael Aug 30 '17 at 12:51
  • Hi, Michael. The best way to objectively test whether this answer is correct is to use '#' to comment out the scale_y_continuous section in each of the ggplot sections of your code. You can either try commenting out the whole section at once, or try it without one or more of the options you've included. By doing this, we can see that adjusting the expand option in the way I've demonstrated won't remove data; it's just adding standard margins so that the data doesn't appear to run off the graph or be removed. – www Aug 30 '17 at 17:53
  • Ryan, I don't need to do that. There shouldn't be a gap in the picture. Your solution does not work. When fixed, the picture will not have a blank space there. The issue is that bar extends above the additional bit you add with your code. I appreciate your input though. – Michael Aug 30 '17 at 18:23
  • If you provide raw data showing that the frequency goes above 10, along with a reproducible example of your own that demonstrates how or why my answer doesn't apply, then I'd be happy to take another look. – www Aug 30 '17 at 18:58
  • The data is produced randomly within in the "switch" function creating the object "data" at the top of the server.r file. – Michael Aug 30 '17 at 19:32
  • The data isn't completely random in this case because you're setting the seed with set.seed(123) before the sampling. So you could provide a much smaller scale question by using the same data. – www Sep 04 '17 at 20:51