0

I have the dataframe

test <- structure(list(
     y2002 = c("freshman","freshman","freshman","sophomore","sophomore","senior"),
     y2003 = c("freshman","junior","junior","sophomore","sophomore","senior"),
     y2004 = c("junior","sophomore","sophomore","senior","senior",NA),
     y2005 = c("senior","senior","senior",NA, NA, NA)), 
              .Names = c("2002","2003","2004","2005"),
              row.names = c(c(1:6)),
              class = "data.frame")
> test
       2002      2003      2004   2005
1  freshman  freshman    junior senior
2  freshman    junior sophomore senior
3  freshman    junior sophomore senior
4 sophomore sophomore    senior   <NA>
5 sophomore sophomore    senior   <NA>
6    senior    senior      <NA>   <NA>

and I want to create a graph that should resemble the ugly text art below:

freshman ---- junior ----------------------\

freshman ---- junior --- sophomore -------- senior

sophomore ================================/

senior ---------------------------------/

In other words, I need to show in a graph the possible paths to "senior", giving weights to edges according to the number of cases using that path.

First attempt This code generates a graph, but not one similar to the text art above.

library(igraph)
elist <- lapply(seq_len(nrow(test)), function(i) {
  x <- as.character(test[i,])
  x <- unique(na.omit(x))
  x <- rep(x, each=2)
  x <- x[-1]
  x <- x[-length(x)]
  r <- matrix(x, ncol=2, byrow=TRUE)
  if (nrow(r) > 0) { r <- cbind(r, i) } else { r <- cbind(r, numeric()) }
  r
})

result <- as.data.frame(do.call(rbind, elist))
names(result) <- c("vertex","edge", "id")
categories <- data.frame(name=c("freshman","junior","sophomore","senior"))
g <- graph.data.frame(result,directed=T,vertices=categories)
g <- set.edge.attribute(g, "weight", value=runif(ecount(g))*10)
igraph.par("plot.layout", layout.reingold.tilford)
plot(g, vertex.label=categories$name, vertex.label.dist=7, 
     edge.width=get.edge.attribute(g,"weight"), edge.arrow.size=1.5)

Result (not what I wanted)

resulting image

**This question is related to this post*

**And this post is a necessary step for solving this question*

Community
  • 1
  • 1
dmvianna
  • 15,088
  • 18
  • 77
  • 106
  • 1
    Your code works, except the labels aren't printed properly and you get an error. Is your question how to remove the error? – Andrie Sep 12 '12 at 09:58
  • I don't get an error, so maybe I didn't paste the code correctly. No, my problem is that the graph I get is not like the graph in my text art. – dmvianna Sep 12 '12 at 10:01
  • PS the error in you code is because the data frame you provide has `NA` values which get read into the graph as nodes. These nodes are not in your `categories` – user1317221_G Sep 12 '12 at 10:48

2 Answers2

3

This is a completely worked out solution. One has to read the graph bottom up, bearing in mind that each individual is represented as a vertical line that represents their career path. Yes, I did ditch iGraph for this task. \o/

enter image description here

require(reshape2)

meltpath <- function(x){
  require(data.table)
  x <- melt(data = x, id.vars = 'id', measure.vars = names(x)[-1])
  names(x) <- c('id','year','category')
  x$year <- factor(x$year)
  id <- unique(x$id)
  idtable <- data.table(id = id, count = 1:length(id))
  x <- x[order(x$id), ]
  x <- merge(x, idtable, by='id')
  return(x)
}

carpath <- function(datatable, max_x = max(datatable$count)){
  require(ggplot2)
  p = ggplot(datatable, aes(x = count, y = year, fill = category)) + 
    geom_tile() +
    scale_y_discrete(name = "year\n", 
                     breaks = rev(levels(datatable$year))) + 
    scale_x_continuous(name = "cumulative count", 
                      limits = c(0,max_x)) +
    guides(fill = guide_legend(title="Career stage\n",
                               reverse=TRUE)) +
    theme(panel.grid.major = element_blank(), 
         panel.background = element_blank(), 
         axis.ticks = element_blank(),
         plot.title = element_text(vjust = 1.2, face="bold", size=20),
         axis.title.y = element_text(size=15, face="bold"),
         axis.text.y = element_text(size=15, colour="black"),
         legend.title = element_text(size = 15),
         legend.text = element_text(size = 15)) +
         scale_fill_brewer(palette = "Dark2") +
    ggtitle("Career path of individual Students by year")
  p
}

test <- structure(list(
  id = 1:6,
  y2002 = c("freshman","freshman","freshman","sophomore","sophomore","senior"),
  y2003 = c("freshman","junior","sophomore","sophomore","sophomore","senior"),
  y2004 = c("junior","sophomore","sophomore","senior","senior",NA),
  y2005 = c("senior","senior","senior",NA, NA, NA)), 
                  .Names = c("id","2002","2003","2004","2005"),
                  row.names = c(c(1:6)),
                  class = "data.frame")
# Grow dataset
testg = data.frame()
for (i in rownames(test)) {
  test0 <- test[rep(i, each=abs(floor(rnorm(1)*100))),]
  testg <- rbind(testg, test0)
}
testg$id <- 1:nrow(testg)
# Munge
test0 <- testg
test1 <- melt(data = test0, id.vars = 'id', measure.vars = names(test0)[-1])
names(test1) <- c('id','year','category')
test1$category[test1$category == 'freshman'] <- 1
test1$category[test1$category == 'junior'] <- 2
test1$category[test1$category == 'sophomore'] <- 3
test1$category[test1$category == 'senior'] <- 4
test1$category <- factor(test1$category, levels=1:4, labels = c('1. freshman','2. junior','3. sophomore','4. senior'))
test1 <- test1[order(test1$category), ]
test1 <- dcast(test1, id ~ year)
test1 <- test1[order(test1$'2005',test1$'2004',test1$'2003',test1$'2002'), ]
test2 <- meltpath(test1)
carpath(test2)
dmvianna
  • 15,088
  • 18
  • 77
  • 106
2

I don't think you understand exactly what your up to, in your example sketch of the graph you have 8 nodes. But in the figure you produced you only have 4 nodes. This is because in the graph you create you only have 4 nodes. igraph will treat nodes with the same name (e.g. two sophomores as the same node but with two edges)

However, once you have made the graph with multiple sophomore etc nodes. e.g. sophomore1 and sophomore2. You can then place the nodes where you want them with layout as below

 df<- read.table(text="vertex edge weight
 freshman junior 2
 junior    senior 2
 freshman2 junior2 2
 junior2 sophomore 2
 sophomore senior 2
 sophomore2 senior 3
 senior2   senior 2",header=TRUE)
 categories <-data.frame(name=c("freshman","junior","sophomore","senior","freshman2",
 "junior2","sophomore2","senior2"))

g <- graph.data.frame(df,directed=T,vertices=categories)
layOUT<-data.frame(x=c(1,2,3,4,1,2,1,1),y=c(4,4,3,3,3,3,2,1))
l<-as.matrix(layOUT)
plot(g,layout=l)

enter image description here

user1317221_G
  • 15,087
  • 3
  • 52
  • 78
  • I know, this is more of a munging question than an igraph one, but thanks for your clarification. I still need to figure out the code to transform my dataset into a proper igraph vertex/edge list. It would be easy if I only had 6 cases and 4 categories in my dataset... :7D – dmvianna Sep 12 '12 at 11:07
  • I shall try that tomorrow at work, thanks. Will post the answer if I find it. – dmvianna Sep 12 '12 at 11:15