0

I have a dataframe:

gene_symbol<-c("DADA","SDAASD","SADDSD","SDADD","ASDAD","XCVXCVX","EQWESDA","DASDADS","SDASDASD","DADADASD","sdaadfd","DFSD","SADADDAD","SADDADADA","DADSADSASDWQ","SDADASDAD","ASD","DSADD")
panel<-c("growth","growth","growth","growth","big","big","big","small","small","dfgh","DF","DF","DF","DF","DF","gh","DF","DF")
ASDDA<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDb<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf2<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf3<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf4<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf5<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDA1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDb1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf11<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf21<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf31<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf41<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf51<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
Gene_states22 <- data.frame(gene_symbol, panel, ASDDA, ASDDb, ASDDAf, ASDDAf1, ASDDAf2, 
                            ASDDAf3, ASDDAf4, ASDDAf5, ASDDA1, ASDDb1, ASDDAf1, ASDDAf11,
                            ASDDAf21, ASDDAf31, ASDDAf41, ASDDAf51)

And I create a heatmap with:

library(ggplot2); library(reshape2)
HG3 <- split(Gene_states22[,1:15], Gene_states22$panel)
HG4 <- melt(HG3, id.vars= c("gene_symbol","panel"))
HG4 <- HG4[,-5]
pp <- ggplot(HG4, aes(gene_symbol,variable)) + 
  geom_tile(aes(fill = value),
            colour = "grey50") + 
  facet_grid(~panel, scales = "free" ,space = "free") +
  scale_fill_manual(values = c("white", "red", "blue", "black", "yellow", "green", "brown"))

enter image description here As you can see I use facet_grid to separate my heatmap into groups based on panel value. The problem is that when I use ggplotly(pp) the column width differs from group to group and my plot seems ugly.

enter image description here

In order to fix the issue I used adapted answer of Plotly and ggplot with facet_grid in R: How to to get yaxis labels to use ticktext value instead of range value? :

library(plotly)
library(ggplot2)
library(data.table)
library(datasets) 


#add fake model for use in facet
dt<-data.table(HG4[1:50,])
dt[,variable:=rownames(HG4)]
dt[,panel:=substr(variable,1,regexpr(" ",variable)-1)][panel=="",panel:=variable]

ggplot.test<-ggplot(dt,aes(gene_symbol,variable))+facet_grid(panel~.,scales="free_y",space="free",drop=TRUE)+
  geom_tile(aes(fill = value),
            colour = "grey50") + 
  scale_fill_manual(values = c("white", "red", "blue", "black", "yellow", "green", "brown")) +
  labs(title = "Heatmap", x = "gene_symbol", y = "sample", fill = "value") +
  guides(fill = FALSE)+
  theme(panel.background = element_rect(fill = NA),
        panel.spacing = unit(0.5, "lines"), ## It was here where you had a 0 for distance between facets. I replaced it by 0.5 .
        strip.placement = "outside")



p <- ggplotly(ggplot.test)
len <- length(unique(HG4$panel))


total <- 1
for (i in 2:len) {
  total <- total + length(p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']])
}

spacer <- 0.01 #space between the horizontal plots
total_length = total + len * spacer
end <- 1
start <- 1

for (i in c('', seq(2, len))) {
  tick_l <- length(p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']]) + 1

  #fix the y-axis
  p[['x']][['layout']][[paste('yaxis', i, sep='')]][['tickvals']] <- seq(1, tick_l)
  p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']][[tick_l]] <- ''

  end <- start - spacer
  start <- start - (tick_l - 1) / total_length
  v <- c(start, end)
  #fix the size
  p[['x']][['layout']][[paste('yaxis', i, sep='')]]$domain <- v
}

p[['x']][['layout']][['annotations']][[3]][['y']] <- (p[['x']][['layout']][['yaxis']]$domain[2] + p[['x']][['layout']][['yaxis']]$domain[1]) /2
p[['x']][['layout']][['shapes']][[2]][['y0']] <- p[['x']][['layout']][['yaxis']]$domain[1]
p[['x']][['layout']][['shapes']][[2]][['y1']] <- p[['x']][['layout']][['yaxis']]$domain[2]

#fix the annotations
for (i in 3:len + 1) {
  #fix the y position
  p[['x']][['layout']][['annotations']][[i]][['y']] <- (p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]]$domain[1] + p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]]$domain[2]) /2
  #trim the text
  p[['x']][['layout']][['annotations']][[i]][['text']] <- substr(p[['x']][['layout']][['annotations']][[i]][['text']], 1, length(p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]][['ticktext']]) * 3 - 3)
}

#fix the rectangle shapes in the background
for (i in seq(0,(len - 2) * 2, 2)) {
  p[['x']][['layout']][['shapes']][[i+4]][['y0']] <- p[['x']][['layout']][[paste('yaxis', i /2 + 2, sep='')]]$domain[1]
  p[['x']][['layout']][['shapes']][[i+4]][['y1']] <- p[['x']][['layout']][[paste('yaxis', i /2 + 2, sep='')]]$domain[2]
}
p

But the heatmap is still not correct: enter image description here

firmo23
  • 7,490
  • 2
  • 38
  • 114

1 Answers1

1

So first things first:

In your case I am not even sure whether a plotly heatmap is what you need. In addition you should never convert a complicated ggplot to plotly. It will fail! In 90% of cases. Try recreating your plot in plotly or whereever you want it to end up. Anything else ends up in coding hell.

I started by doing some research:

  1. Here is a good description how to create heatmaps with different colors in plotly
  2. This explains how you can create titles in subplots.

From post 1 I know that I have to create a matrix for each level in your data. So I wrote a function for that:

mymat<-as.matrix(Gene_states22[,-1:-2])

### Creates a 1-NA dummy matrix for each level. The output is stored in a list
dummy_mat<-function(mat,levels,names_col){
  mat_list<-lapply(levels,function(x){
                            mat[mat!=x]=NA
                            mat[mat==x]=1
                            mymat=t(apply(mat,2,as.numeric))
                            colnames(mymat)=names_col
                            return(mymat)
                            })
  names(mat_list)=levels
  return(mat_list)
}
my_mat_list<-dummy_mat(mymat,c('DF','low','normal','over'),Gene_states22$gene_symbol)

### Optional: The heatmap type is peculiar - I created a text-NA matrix for each category as well
text_mat<-function(mat,levels,names_col){
  mat_list<-lapply(levels,function(x){
                            mat[mat!=x]=NA
                            mat=t(mat)
                            colnames(mat)=names_col
                            return(mat)
                            })
  names(mat_list)=levels
  return(mat_list)
}
   my_mat_list_t<-text_mat(mymat,c('DF','low','normal','over'),as.character(Gene_states22$gene_symbol))

In addition I needed colors for each level. These colors are created using some dataframe. You may write a similar (lapply-)loop here as well:

DF_Color <- data.frame(x = c(0,1), y = c("#DEDEDE", "#DEDEDE"))
colnames(DF_Color) <- NULL

lowColor <- data.frame(x = c(0,1), y = c("#00CCFF", "#00CCFF"))
colnames(lowColor) <- NULL

normColor <- data.frame(x = c(0,1), y = c("#DEDE00", "#DEDE00"))
colnames(normColor) <- NULL

overColor <- data.frame(x = c(0,1), y = c("#DE3333", "#DE3333"))
colnames(overColor) <- NULL

In addition we need the columns in the matrix for each panel-category:

mycols<-lapply(levels(Gene_states22$panel),function(x) grep(x,Gene_states22$panel))

I stored this in a list as well. Next I use lapply-loop to plot. I store the values in a list and use subplot to put everything together:

library(plotly)

p_list<-lapply(1:length(mycols),function(j){
  columns<-mycols[[j]]

p<-plot_ly(
    type = "heatmap"
) %>% add_trace(
    y=rownames(my_mat_list$DF),x=colnames(my_mat_list$DF)[columns],
    z = my_mat_list$DF[,columns],
    xgap=3,ygap=3, text=my_mat_list_t$DF[,columns],hoverinfo="x+y+text",
    colorscale = DF_Color,
    colorbar = list(
        len = 0.3,
        y = 0.3,
        yanchor = 'top',
        title = 'DF series',
        tickvals = ''
    )
) %>% add_trace(
  y=rownames(my_mat_list$low),x=colnames(my_mat_list$low)[columns],
    z = my_mat_list$low[,columns],
    xgap=3,ygap=3,text=my_mat_list_t$low[,columns],hoverinfo="x+y+text",
    colorscale = lowColor,
    colorbar = list(
        len = 0.3,
        y = 0.3,
        yanchor = 'top',
        title = 'low series',
        tickvals = ''
    )
) %>% add_trace(
  y=rownames(my_mat_list$normal),x=colnames(my_mat_list$normal)[columns],
    z = my_mat_list$normal[,columns],
    xgap=3,ygap=3,text=my_mat_list_t$normal[,columns],hoverinfo="x+y+text",
    colorscale = normColor,
    colorbar = list(
        len = 0.3,
        y = 1,
        yanchor = 'top',
        title = 'normal series',
        tickvals = ''
    )
) %>% add_trace(
  y=rownames(my_mat_list$over),x=colnames(my_mat_list$over)[columns],
    z = my_mat_list$over[,columns],
    xgap=3,ygap=3,text=my_mat_list_t$over[,columns],hoverinfo="x+y+text",
    colorscale = overColor,
    colorbar = list(
        len = 0.3,
        y = 1,
        yanchor = 'top',
        title = 'over series',
        tickvals = ''
    )
 )
return(p)
})

subplot(p_list[[1]],p_list[[2]],shareY = TRUE) %>%
  layout(annotations = list(
 list(x = 0.2 , y = 1.05, text = levels(Gene_states22$panel)[1], showarrow = F, xref='paper', yref='paper'),
  list(x = 0.8 , y = 1.05, text = levels(Gene_states22$panel)[2], showarrow = F, xref='paper', yref='paper'))
)

enter image description here

POSSIBLE ISSUES:

  1. You have to become create around categories like dfgh which occur only once. If only one column is selected in R, the output is automatically transformed into a (numeric or character) vector-type. Thus maybe add an as.matrix() to all z and text arguments
  2. hover-text doesn't really work. But plotly has a good documentation there. You should be able to figure that out.
  3. You also have to specify the width in the subplot-function. That will be fiddly if you have more than 10 categories.
  4. Interactivity doesn't really work. You can't remove traces. Why? No idea. Do some research if you need it. I guess it is connected with the plot type.
  5. I recommend specifying the extend of the plot(s) in px. That might make the tiles more similar.
  6. Finally you will need some reference for the (subplot) titles and you will need to adjust the margins of your plot. Such that the titles are visible.
5th
  • 2,097
  • 3
  • 22
  • 41