2

In subjective probability assessments one needs to elicit the distribution of subjects believes. It can be achieved by letting the subject manipulate the relative height of each frequency bin of a histogram. I.e. the distribution of probability, the envelope curve gets shaped maintaining the cumulative sum(P_i)=1. How can this be done with R? Is there already a package I can build on?


Alternatively: how can it be done in a spreadsheet application (excel, oo calc, google spreadsheet)?

Roland Kofler
  • 1,332
  • 1
  • 16
  • 33

1 Answers1

3

Here is some code that I put together using the tkrplot package and optionally the logspline package.

Just run the function (you can change the arguments, but to test you can try it with the defaults) then in the new window that comes up click in the plot, left clicks will add a point where you click, right (or middle) clicks will remove the point closest to where you click.

I will probably clean it up a bit and include it in a future release of the TeachingDemos package (so comments/suggestions are very welcome).

TkBuildDist <- function(  x=seq(min+(max-min)/nbin/2,
                                max-(max-min)/nbin/2,
                                length.out=nbin),
                          min=0, max=10, nbin=10, logspline=TRUE,
                          intervals=FALSE) {

    if(logspline) logspline <- require(logspline)
    require(tkrplot)

    xxx <- x

    brks <- seq(min, max, length.out=nbin+1)
    nx <- seq( min(brks), max(brks), length.out=250 )

    lx <- ux <- 0
    first <- TRUE

    replot <- if(logspline) {
        if(intervals) {
            function() {
                hist(xxx, breaks=brks, probability=TRUE,xlab='', main='')
                xx <- cut(xxx, brks, labels=FALSE)
                fit <- oldlogspline( interval = cbind(brks[xx], brks[xx+1]) )
                lines( nx, doldlogspline(nx,fit), lwd=3 )
                if(first) {
                    first <<- FALSE
                    lx <<- grconvertX(min, to='ndc')
                    ux <<- grconvertX(max, to='ndc')
                }
            }
        } else {
            function() {
                hist(xxx, breaks=brks, probability=TRUE,xlab='', main='')
                fit <- logspline( xxx )
                lines( nx, dlogspline(nx,fit), lwd=3 )
                if(first) {
                    first <<- FALSE
                    lx <<- grconvertX(min, to='ndc')
                    ux <<- grconvertX(max, to='ndc')
                }
            }
        }
    } else {
        function() {
            hist(xxx, breaks=brks, probability=TRUE,xlab='',main='')
            if(first) {
                first <<- FALSE
                lx <<- grconvertX(min, to='ndc')
                ux <<- grconvertX(max, to='ndc')
            }
        }
    }

    tt <- tktoplevel()
    tkwm.title(tt, "Distribution Builder")

    img <- tkrplot(tt, replot, vscale=1.5, hscale=1.5)
    tkpack(img, side='top')

    tkpack( tkbutton(tt, text='Quit', command=function() tkdestroy(tt)),
           side='right')

    iw <- as.numeric(tcl('image','width',tkcget(img,'-image')))

    mouse1.down <- function(x,y) {
        tx <- (as.numeric(x)-1)/iw
        ux <- (tx-lx)/(ux-lx)*(max-min)+min
        xxx <<- c(xxx,ux)
        tkrreplot(img)
    }

    mouse2.down <- function(x,y) {
        if(length(xxx)) {
            tx <- (as.numeric(x)-1)/iw
            ux <- (tx-lx)/(ux-lx)*(max-min)+min
            w <- which.min( abs(xxx-ux) )
            xxx <<- xxx[-w]
            tkrreplot(img)
        }
    }

    tkbind(img, '<ButtonPress-1>', mouse1.down)
    tkbind(img, '<ButtonPress-2>', mouse2.down)
    tkbind(img, '<ButtonPress-3>', mouse2.down)

    tkwait.window(tt)

    out <- list(x=xxx)
    if(logspline) {
        if( intervals ) {
            xx <- cut(xxx, brks, labels=FALSE)
            out$logspline <- oldlogspline( interval = cbind(brks[xx], brks[xx+1]) )
        } else {
            out$logspline <- logspline(xxx)
        }
    }

    if(intervals) {
        out$intervals <- table(cut(xxx, brks))
    }

    out$breaks <- brks

    return(out)
}

Here is another version that allows the dragging of the heights of the bars:

TkBuildDist2 <- function( min=0, max=1, nbin=10, logspline=TRUE) {
    if(logspline) logspline <- require(logspline)
    require(tkrplot)

    xxx <- rep( 1/nbin, nbin )

    brks <- seq(min, max, length.out=nbin+1)
    nx <- seq( min, max, length.out=250 )

    lx <- ux <- ly <- uy <- 0
    first <- TRUE

    replot <- if(logspline) {
        function() {
            barplot(xxx, width=diff(brks), xlim=c(min,max), space=0,
                    ylim=c(0,0.5), col=NA)
            axis(1,at=brks)
            xx <- rep( 1:nbin, round(xxx*100) )
            capture.output(fit <- oldlogspline( interval = cbind(brks[xx], brks[xx+1]) ))
            lines( nx, doldlogspline(nx,fit)*(max-min)/nbin, lwd=3 )

            if(first) {
                first <<- FALSE
                lx <<- grconvertX(min, to='ndc')
                ly <<- grconvertY(0,   to='ndc')
                ux <<- grconvertX(max, to='ndc')
                uy <<- grconvertY(0.5, to='ndc')
            }
        }
    } else {
        function() {
            barplot(xxx, width=diff(brks), xlim=range(brks), space=0,
                    ylim=c(0,0.5), col=NA)
            axis(at=brks)
            if(first) {
                first <<- FALSE
                lx <<- grconvertX(min, to='ndc')
                ly <<- grconvertY(0,   to='ndc')
                ux <<- grconvertX(max, to='ndc')
                uy <<- grconvertY(0.5, to='ndc')
            }
        }
    }

    tt <- tktoplevel()
    tkwm.title(tt, "Distribution Builder")

    img <- tkrplot(tt, replot, vscale=1.5, hscale=1.5)
    tkpack(img, side='top')

    tkpack( tkbutton(tt, text='Quit', command=function() tkdestroy(tt)),
           side='right')

    iw <- as.numeric(tcl('image','width',tkcget(img,'-image')))
    ih <- as.numeric(tcl('image','height',tkcget(img,'-image')))



    md <- FALSE

    mouse.move <- function(x,y) {
        if(md) {
            tx <- (as.numeric(x)-1)/iw
            ty <- 1-(as.numeric(y)-1)/ih

            w <- findInterval(tx, seq(lx,ux, length=nbin+1))

            if( w > 0 && w <= nbin && ty >= ly && ty <= uy ) {
                 xxx[w] <<- 0.5*(ty-ly)/(uy-ly)
                xxx[-w] <<- (1-xxx[w])*xxx[-w]/sum(xxx[-w])

                tkrreplot(img)
            }
        }
    }

    mouse.down <- function(x,y) {
        md <<- TRUE
        mouse.move(x,y)
    }

    mouse.up <- function(x,y) {
        md <<- FALSE
    }

    tkbind(img, '<Motion>', mouse.move)
    tkbind(img, '<ButtonPress-1>', mouse.down)
    tkbind(img, '<ButtonRelease-1>', mouse.up)

    tkwait.window(tt)

    out <- list(breaks=brks, probs=xxx)
    if(logspline) {
        xx <- rep( 1:nbin, round(xxx*100) )
        out$logspline <- oldlogspline( interval = cbind(brks[xx], brks[xx+1]) )
    }

    return(out)
}
Greg Snow
  • 48,497
  • 6
  • 83
  • 110
  • That is fantastic! very intuitive and smooth. Note, I think you mean 'nbin', not 'nbins' in the function arguments. – Ian Fellows Jan 27 '11 at 08:18
  • 1
    Roo, the reason I did the histogram with probability=TRUE and included the logspline was so that visually the relative frequencies are what are being seen. Yes we are adding by single units, but visually when you increase one part all the other parts are reduced so that the overall area remains 1. Can you give more detail on what you envision the interface to be like? – Greg Snow Jan 27 '11 at 15:12
  • It implements the functionality described. Didn't work on Ubuntu, on Windows Vista works. Tomorrow I try again on Ubuntu. The final touch would be if you can drag a bin so that you automaticaly remove from all other bins the same amount that you continually increase on the very bin. Only then you would be able to fine tune subjective probability expectations. thanks – Roland Kofler Jan 27 '11 at 22:50
  • 1
    OK, I have added another version that allows the dragging of the bars. – Greg Snow Jan 28 '11 at 22:07