3

I have this matrix called mymat with more than 70000 rows. I want to plot the key.related.sheet (samples) where the values from Num_Good_SNPs_A columns will be the X-axis and the IBS_from columns will be the Y-axis. How can I get this done in R?

    mymat<-structure(c("AOGC-02-0010:AOGC-02-0010", "AOGC-02-0010:AOGC-02-0022", 
"AOGC-02-0010:AOGC-02-0063", "AOGC-02-0010:AOGC-02-0079", "AOGC-02-0010:AOGC-02-0087", 
"AOGC-02-0010:AOGC-02-0105", "AOGC-02-0010:AOGC-02-0108", "AOGC-02-0010:AOGC-02-0112", 
"AOGC-02-0010:AOGC-02-0118", "AOGC-02-0010:AOGC-02-0161", "AOGC-02-0022:AOGC-08-0032", 
"AOGC-02-0022:AOGC-08-0054", "AOGC-02-0022:AOGC-08-0067", "AOGC-02-0022:AOGC-08-0083", 
"AOGC-02-0022:AOGC-08-0090", "AOGC-02-0022:AOGC-08-0097", "AOGC-02-0022:AOGC-08-0125", 
"AOGC-02-0022:AOGC-08-0139", "AOGC-02-0022:AOGC-08-0140", "AOGC-02-0022:AOGC-08-0154", 
"AOGC-02-0022:AOGC-08-0167", "1.12", "-0.0176", "0.0253", "0.0132", 
"-0.00835", "-0.0205", "-0.00759", "-0.0134", "-0.0351", "0.00399", 
"0.00021", "-0.0144", "0.013", "0.0035", "-0.00973", "-0.0109", 
"-0.0000367", "0.0196", "0.00304", "-0.00509", "-0.0224", "16987", 
"13360", "12837", "11836", "12097", "13016", "15128", "14564", 
"13262", "14685", "14944", "13516", "14257", "11146", "13545", 
"15112", "13164", "13343", "10284", "12705", "10599", "1.12", 
"-0.00582", "0.0171", "0.0107", "-0.0132", "-0.0214", "-0.0145", 
"0.00123", "-0.0268", "0.00374", "0.00549", "-0.0215", "0.0285", 
"0.0038", "-0.0133", "-0.0268", "-0.00474", "0.0259", "0.00451", 
"-0.0111", "-0.0234", "14152", "11228", "10739", "9903", "10134", 
"10871", "12597", "12141", "11083", "12246", "12573", "11368", 
"11988", "9410", "11420", "12699", "11084", "11197", "8642", 
"10710", "8957", "1.13", "-0.00634", "0.0192", "0.0232", "-0.0328", 
"-0.0383", "-0.0318", "-0.0104", "-0.000175", "-0.0178", "0.0283", 
"-0.0158", "0.0265", "0.00121", "-0.00154", "-0.0118", "0.00984", 
"-0.0346", "0.00538", "0.0209", "-0.0485", "7158", "5720", "5478", 
"5049", "5178", "5480", "6374", "6133", "5613", "6187", "6358", 
"5852", "6095", "4772", "5835", "6433", "5706", "5658", "4377", 
"5560", "4678", "1.15", "0.0139", "0.0155", "0.0113", "-0.0232", 
"-0.0175", "-0.00998", "-0.00379", "0.00973", "-0.0469", "0.0484", 
"0.000755", "0.0332", "0.00556", "0.0302", "-0.049", "-0.00191", 
"-0.0276", "0.00223", "0.037", "-0.0643", "3759", "2986", "2849", 
"2634", "2681", "2875", "3394", "3237", "2932", "3243", "3313", 
"3023", "3163", "2499", "3047", "3311", "2897", "2966", "2303", 
"2869", "2342"), .Dim = c(21L, 9L), .Dimnames = list(c("1", "2", 
"3", "4", "5", "6", "7", "8", "9", "10", "100", "101", "102", 
"103", "104", "105", "106", "107", "108", "109", "110"), c("key.related.sheet", 
"IBS_from:324711SNPS", "Num_Good_SNPs_A:324711SNPS", "IBS_from:266640SNPS", 
"Num_Good_SNPs_A:266640SNPS", "IBS_from:133224SNPS", "Num_Good_SNPs_A:133224SNPS", 
"IBS_from:66441SNPS", "Num_Good_SNPs_A:66441SNPS")))

Here is the code I have tried:

mydf <- as.data.frame(mymat)
rownames(mydf) <- mydf[,"key.related.sheet"] 
mydf[, grepl("IBS",colnames(mydf))] <- lapply(mydf[, grepl("IBS",colnames(mydf))], function (x){as.numeric(as.character(x))})
mydf[, grepl("Num_Good",colnames(mydf))] <- lapply(mydf[, grepl("Num_Good",colnames(mydf))], function (x){as.numeric(as.character(x))})
effCis <- grep('^IBS',names(mydf));
find.measurements <- grep("^Num_Good_SNPs", names(mydf))

#xlim <- c(1,length(find.measurements));
#xlim <- range(mydf[,find.measurements],na.rm=T);
xlim <- c(1,length(effCis))
ylim <- range(mydf[,effCis],na.rm=T);
ylim[1L] <- floor(ylim[1L]/0.1)*0.1;
ylim[2L] <- ceiling(ylim[2L]/0.1)*0.1;
yticks <- seq(ylim[1L],ylim[2L],0.1);

xticks <- seq(from = max(mydf[, find.measurements]), to = min(mydf[, find.measurements]), length.out = length(effCis))
#xticks <- seq(from = max(mydf[, find.measurements]), to = min(mydf[, find.measurements]), length.out = 7)

plot(NA,xlim=c(min(xticks), max(xticks)), ylim=ylim,xlab='Number of good SNPs used',ylab='Samples',xaxs='i',yaxs='i',axes=FALSE)
#plot(mydf[,effCis],mydf[,find.measurements])

par(mar=c(5,4,4,5)+.1)
mtext("IBS", side=4, line= 2.5)
#plot(NA,xlim=c(min(xticks), max(xticks)), ylim=ylim,xlab='Numbers of good SNPs used',ylab='IBS',xaxs='i',yaxs='i',axes=TRUE)
abline(v=xticks,col='lightgrey');
abline(h=yticks,col='lightgrey');
abline(h=0,lwd=2);


axis(side = 1, at = xticks)

##axis(2L,yticks,sprintf('%.1f',yticks),las=1L,font=2L,cex.axis=0.7);
#axis(4L,yticks,sprintf('%.1f',yticks),las=1L,font=2L,cex.axis=0.7);
axis(4L,yticks,sprintf('%.1f',yticks),las=1L,font=2L,cex.axis=0.6);

hybrid.col <- data.frame(hybrid=seq_len(nrow(mydf)),col=rainbow(nrow(mydf)),stringsAsFactors=F);

#cryptic.col <- data.frame(cryptic=seq_len(nrow(mydf)),col=rainbow(nrow(mydf)),stringsAsFactors=F);
#with((mydf[which(mydf[,colnames(mydf)[grepl("IBS_",colnames(mydf))][1]]>=0.59 & mydf[,colnames(mydf)[grepl("IBS_",colnames(mydf))][1]] <= 0.9),]), text(xlab ~ ylab, labels = rownames(mydf)[mydf[,colnames(mydf)[grepl("IBS_",colnames(mydf))][1]]>=0.59 & mydf[,colnames(mydf)[grepl("IBS_",colnames(mydf))][1]] <= 0.9]), pos = 4)

#rownames(mydf)[mydf[,colnames(mydf)[grepl("IBS_",colnames(mydf))][1]]>=0.59 & mydf[,colnames(mydf)[grepl("IBS_",colnames(mydf))][1]] <= 0.9]


#hybrid.col <- data.frame(hybrid=seq_len(nrow(mydf)),stringsAsFactors=F);
splineN <- 200L;
for (ri in seq_len(nrow(hybrid.col))) {
  hybrid <- hybrid.col$hybrid[ri];
  col <- hybrid.col$col[ri];
  x <- xticks;
  y <- c(as.matrix(mydf[hybrid,effCis]));
  points(x,y,pch=16L,xpd=NA);
  with(spline(x,y,splineN),{
    lines(x,y,col=col,lwd=2,xpd=NA);
    localwin <- which(x > 2 & x < 3);
    tp <- which.min (abs (diff ( y [localwin]) ) );
    if (length (tp) > 0L) points (x[localwin [tp]] , y[localwin[tp]] , col = col,pch=4L);
    localwin <- which (x > 2 & x < 5);
    tp <- which.min (diff (y[localwin]));
    if (length(tp) > 0L) {
      m <- diff(y[localwin[seq(tp,len=2L)]])/diff(x[localwin[seq(tp,len=2L)]]);
      if (is.finite(m)) abline(y[localwin[tp]]-m*x[localwin[tp]],m,col=col,lty=2L);
    };
  });
};

abline(h = 0.5, lwd = 2, lty = 15 , col = "Black");
abline(h = 1.0, lwd = 2, lty = 15, col = "Black");
abline(h = 0, lwd = 2, lty = 15, col = "Black");
MAPK
  • 5,635
  • 4
  • 37
  • 88
  • 1
    @zx8754 Please see the code I have tried. However it does not correctly account for the number of SNPs (i.e. X-axis). – MAPK Jun 16 '16 at 06:24

1 Answers1

1

We can use melt from data.table to reshape it to 'long' format as there are multiple patterns in the dataset. As matrix can hold only a single class (and when there is a single character element in the matrix, the whole dataset will be character matrix), when we convert it to data.table, the columns will be all character class. The numeric columns are converted back to their original class and then use ggplot to plot the columns based on the OP's description.

library(data.table)
library(ggplot2)
library(dplyr)
melt(as.data.table(mymat), measure =  patterns("^IBS", "^Num_Good"), 
     value.name = c("IBS", "Num_Good")) %>%
.[, c("IBS", "Num_Good") := lapply(.SD, as.numeric), .SDcols = IBS:Num_Good] %>% 
.[, Grp := cut(IBS, breaks = c(-Inf, 0.3, 0.5, 1.2, Inf))] %>%
  ggplot(., aes(x = Num_Good, y = IBS, col = key.related.sheet)) + 
       geom_line() + 
       facet_wrap(~Grp)
akrun
  • 874,273
  • 37
  • 540
  • 662
  • Thanks. Is there a way we can have different colours for lines that fall at the range of IBS value of `0-0.3`, `0.3-0.5` and `0.9-1.2`? I would like a line plot instead. – MAPK Jun 16 '16 at 06:28
  • 1
    @MAPK In that case, use `geom_line` – akrun Jun 16 '16 at 06:30
  • 1
    @MAPK We can create a group with `cut` and use it – akrun Jun 16 '16 at 06:35
  • How do I get rid of the `key.related.sheet` labels on the right? It occupies most of the space in the plot. – MAPK Jun 16 '16 at 06:50
  • Another issue with the plot is it breaks the plot into two sections, one where we have lines with IBS close to zero and the other with IBS close to 1.2. Can we have everything in the same plot? – MAPK Jun 16 '16 at 06:52
  • 1
    @MAPK Try with `scale_colour_discrete(guide = FALSE)` – akrun Jun 16 '16 at 06:52
  • 1
    @MAPK It is because we used `facet_wrap`. Removing them and adding the Grp as a grouping variable should fix it, but then it will be difficult to identify. – akrun Jun 16 '16 at 06:54
  • @MAPK Does `%>% ggplot(., aes(x = Num_Good, y = IBS, col = key.related.sheet, group = Grp)) + geom_line() +scale_colour_discrete(guide = FALSE)` work for you – akrun Jun 16 '16 at 06:58
  • No it didn't. I just used `scale_colour_discrete(guide = FALSE)`, omitting `facet_wrap(~Grp)` and it just worked fine. How do I rotate the face of the plot? That is I want to keep the IBS (Y-)axis on the right instead of left? – MAPK Jun 16 '16 at 07:02
  • @MAPK Perhaps http://stackoverflow.com/questions/10063701/how-to-rotate-the-axis-labels-in-ggplot2 – akrun Jun 16 '16 at 07:15