3

I have a data set of item difficulties that correspond to items on a questionnaire that looks like this:

##         item  difficulty
## 1  ITEM_01_A  2.31179818
## 2  ITEM_02_B  1.95215238
## 3  ITEM_03_C  1.93479536
## 4  ITEM_04_D  1.62610855
## 5  ITEM_05_E  1.62188759
## 6  ITEM_06_F  1.45137544
## 7  ITEM_07_G  0.94255210
## 8  ITEM_08_H  0.89941812
## 9  ITEM_09_I  0.72752197
## 10 ITEM_10_J  0.61792597
## 11 ITEM_11_K  0.61288399
## 12 ITEM_12_L  0.39947791
## 13 ITEM_13_M  0.32209970
## 14 ITEM_14_N  0.31707701
## 15 ITEM_15_O  0.20902108
## 16 ITEM_16_P  0.19923607
## 17 ITEM_17_Q  0.06023317
## 18 ITEM_18_R -0.31155481
## 19 ITEM_19_S -0.67777282
## 20 ITEM_20_T -1.15013758

I want to make an item map of these items that looks similar (not exactly) to this (I created this in word but it lacks true scaling as I just eyeballed the scale). It's not really a traditional statistical graphic and so I don't really know how to approach this. I don't care what graphics system this is done in but I am more familiar with ggplot2 and base.

I would greatly appreciate a method of plotting this sort of unusual plot.

Here's the data set (I'm including it as I was having difficulty using read.table on the dataframe above):

DF <- structure(list(item = c("ITEM_01_A", "ITEM_02_B", "ITEM_03_C", 
    "ITEM_04_D", "ITEM_05_E", "ITEM_06_F", "ITEM_07_G", "ITEM_08_H", 
    "ITEM_09_I", "ITEM_10_J", "ITEM_11_K", "ITEM_12_L", "ITEM_13_M", 
    "ITEM_14_N", "ITEM_15_O", "ITEM_16_P", "ITEM_17_Q", "ITEM_18_R", 
    "ITEM_19_S", "ITEM_20_T"), difficulty = c(2.31179818110545, 1.95215237740899, 
    1.93479536058926, 1.62610855327073, 1.62188759115818, 1.45137543733965, 
    0.942552101641177, 0.899418119889782, 0.7275219669431, 0.617925967008653, 
    0.612883990709181, 0.399477905189577, 0.322099696946661, 0.31707700560997, 
    0.209021078266059, 0.199236065264793, 0.0602331732900628, -0.311554806052955, 
    -0.677772822413495, -1.15013757942119)), .Names = c("item", "difficulty"
    ), row.names = c(NA, -20L), class = "data.frame")

Thank you in advance.

Tyler Rinker
  • 108,132
  • 65
  • 322
  • 519

3 Answers3

5

Here is a quick example:

ggplot(DF, aes(x=1, y=difficulty, label = item)) + 
  geom_text(size = 3) + 
  scale_y_continuous(breaks = DF$difficulty, minor_breaks = NULL, labels = sprintf("%.02f", DF$difficulty)) +
  scale_x_continuous(breaks = NULL) +
  opts(panel.grid.major = theme_blank())

but sometimes two items are too narrow so overlapped. You may do like this:

m <- 0.1
nd <- diff(rev(DF$difficulty))
nd <- c(0, cumsum(ifelse(nd < m, m, nd)))
DF$nd <- rev(rev(DF$difficulty)[1] + nd)

ggplot(DF, aes(x=1, y=nd, label = item)) + 
  geom_text(size = 3) + 
  scale_y_continuous(breaks = DF$nd, labels = sprintf("%.02f", DF$difficulty), DF$difficulty, minor_breaks = NULL) +
  scale_x_continuous(breaks = NULL) +
  opts(panel.grid.major = theme_blank())
Tyler Rinker
  • 108,132
  • 65
  • 322
  • 519
kohske
  • 65,572
  • 8
  • 165
  • 155
4

Here is a solution with base graphics.

# Compute the position of the labels to limit overlaps:
# move them as little as possible, but keep them 
# at least .1 units apart.
library(quadprog)
spread <- function(b, eps=.1) {
  stopifnot(b == sort(b))
  n <- length(b)
  Dmat <- diag(n)
  dvec <- b
  Amat <- matrix(0,nr=n,nc=n-1)
  Amat[cbind(1:(n-1), 1:(n-1))] <- -1
  Amat[cbind(2:n,     1:(n-1))] <-  1
  bvec <- rep(eps,n-1)
  r <- solve.QP(Dmat, dvec, Amat, bvec)
  r$solution
}
DF <- DF[ order(DF$difficulty), ]
DF$position <- spread(DF$difficulty, .1)

ylim <- range(DF$difficulty)
plot( NA, 
  xlim = c(.5,2), 
  ylim = ylim + .1*c(-1,1)*diff(ylim),
  axes=FALSE, xlab="", ylab="" 
)
text(.9,  DF$position, labels=round(DF$difficulty,3), adj=c(1,0))
text(1.1, DF$position, labels=DF$item, adj=c(0,0))
arrows(1,min(DF$position),1,max(DF$position),code=3)
text(1,min(DF$position),labels="Easier",adj=c(.5,2))
text(1,max(DF$position),labels="More difficult",adj=c(.5,-1))
text(.9, max(DF$position),labels="Difficulty",adj=c(1,-2))
text(1.1,max(DF$position),labels="Item",      adj=c(0,-2))
Tyler Rinker
  • 108,132
  • 65
  • 322
  • 519
Vincent Zoonekynd
  • 31,893
  • 5
  • 69
  • 78
  • I got it: `library(quadprog)` – Tyler Rinker Apr 02 '12 at 07:17
  • It solves quadratic optimization problems (which may be overkill for the problem at hand). More precisely, it tries to find the new positions for the labels, keeping them in the same order, but at least .1 units apart, and as close as possible from their initial positions. There is a detailed example [here](http://stackoverflow.com/questions/9817001/optimization-with-constraints/9817442#9817442). The code is actually the same code: I just replaced the `x[i+1] - x[i] >= 0` constraint with `x[i+1] - x[i] >= 0.1`. – Vincent Zoonekynd Apr 02 '12 at 07:21
  • Vincent It looks really terrific. Thank you. – Tyler Rinker Apr 02 '12 at 07:30
1

My own attempt but I think I'm going to like Vincent's solution much better as it looks like my original specification.

DF <- DF[order(DF$difficulty), ]

par(mar=c(1, 1, 3, 0)+.4)
plot(rep(1:2, each=10), DF$difficulty, main = "Item Map         ",
    ylim = c(max(DF$difficulty)+1, min(DF$difficulty)-.2), 
    type = "n", xlab="", ylab="", axes=F, xaxs="i")

text(rep(1.55, 20), rev(DF$difficulty[c(T, F)]), 
    DF$item[c(F, T)], cex=.5, pos = 4)
text(rep(1, 20), rev(DF$difficulty[c(F, T)]), 
    DF$item[c(T, F)], cex=.5, pos = 4)
par(mar=c(0, 0, 0,0))
arrows(1.45, 2.45, 1.45, -1.29, .1, code=3)
text(rep(1.52, 20), DF$difficulty[c(T, F)], 
    rev(round(DF$difficulty, 2))[c(T, F)], cex=.5, pos = 2)
text(rep(1.44, 20), DF$difficulty[c(F, T)], 
    rev(round(DF$difficulty, 2))[c(F, T)], cex=.5, pos = 2)
text(1.455, .5,  "DIFFICULTY", cex=1, srt = -90)
text(1.45, -1.375,  "More Difficult", cex=.6)
text(1.45, 2.5,  "Easier", cex=.6)
par(mar=c(0, 0, 0,0))
Tyler Rinker
  • 108,132
  • 65
  • 322
  • 519