I have two lists of genes and i want to represent the overlap between the lists in a venn diagram. Could anybody suggest what I am missing in the code below? I'm using the VennDiagram package and the result i get is two non intersecting circles. I have also used Venny https://bioinfogp.cnb.csic.es/tools/venny/index2.0.2.html to draw the venn and confirmed there is an overlap.
library(VennDiagram)
#select the data
dataset1 <- data.frame(as.character(v7$HGNC_Symbol)) #567 genes
dataset2 <- data.frame(as.character(test$toupper.v13.HGNC_Symbol.)) #476 genes
#rename the columns so I know the source
names(dataset1)[1] <- "Progenesis"
names(dataset2)[1] <- "BiomaRt"
#sort the dataframes alphabetically
dataset1 <- dataset1 %>% arrange(Progenesis)
dataset2 <- dataset2 %>% arrange(BiomaRt)
#create a list of numbers and join it to dataframe1
mylist <- c(1:592)
dataset1 <- data.frame(mylist, dataset1)
#create a list of numbers and join it to dataframe2
mylist <- c(1:494)
dataset2 <- data.frame(mylist, dataset2)
#join the two dataframes
v <- plyr::join(dataset1, dataset2, type='full')
#draw the Venn Diagram
Dataset1 <- as.vector(v$Progenesis)
Dataset2 <- as.vector(v$BiomaRt)
Dataset2 <- na.omit(Dataset2)
#first 20 rows of each dataset
dput(Dataset1)
c("AACS ", "AARS ", "ABCF1 ", "ACAD11 ", "ACIN1 ", "ACO1 ", "ACOX3 ",
"ACP1 ", "ACSL3 ", "ACY1A ", "ACYP1 ", "ADA ", "ADI1 ", "ADK ",
"ADSL ", "ADSS ", "AGPAT3 ", "AHSA2 ", "AK4 ", "AKAP1 ")
dput(Dataset2)
structure(c("AACS", "AARS", "AARS", "ABCF1", "ACAD11", "ACIN1",
"ACO1", "ACOX3", "ACSL3", "ACYP1", "ADA", "ADI1", "ADK", "ADSL",
"AGPAT3", "AHSA2", "AKAP1", "AKAP12", "AKR1A1", "AKR1A1"),
na.action = structure(495:592, class = "omit"))
venn.diagram(
x = list(Dataset1, Dataset2),
category.names = c("Set1" , "Set2"),
filename = 'venn_diagram.png',
output=TRUE
)
All suggestions welcome.
Thanks