I am running an R script to analyze some biological data. Example of the snippet data and script is provided below. This data file has many columns (but I would like to focus on 5th column- Gene). I have more than 100 data files like this (consider 5th Gene column in all files as the column of interest). Currently I am running each file separately in R and the process is tedious. I would like to run a R script for all data files simultaneously and save all the data in different folder as per the file name. Is it possible to loop or iterate and analyze all the data files at once in a script. Please assist me with this.
Thank you,
Toufiq
Formatted question and Revised dataframe
Inserted: Names of the files to be read
M3.1_IPA.txt
M8.1_IPA.txt
M8.2_IPA.txt
M8.3_IPA.txt
1. Load gene sets to analyze
Data_file <- read.delim(file = "./M3.1_IPA.txt", stringsAsFactors = FALSE, check.names = FALSE, row.names = NULL)
dput(head(Data_file, 5))
structure(list(row.names = c("M3.1_ALPP", "M3.1_ALS2CR14", "M3.1_ANKRD30B",
"M3.1_ARL16", "M3.1_BCYRN1"), X = 1:5, Module = c("M3.1", "M3.1",
"M3.1", "M3.1", "M3.1"), Title = c("Cell Cycle", "Cell Cycle",
"Cell Cycle", "Cell Cycle", "Cell Cycle"), Gene = c("ALPP", "ALS2CR14",
"ANKRD30B", "ARL16", "BCYRN1"), Probes = c("ILMN_1693789", "ILMN_1787314",
"ILMN_1730678", "ILMN_2188119", "ILMN_1678757"), Module_gene = c("M3.1_ALPP",
"M3.1_ALS2CR14", "M3.1_ANKRD30B", "M3.1_ARL16", "M3.1_BCYRN1"
)), row.names = c(NA, 5L), class = "data.frame")
Module_10_1 <- Data_file[,5]
Module_10_1_geneLists <- list(Module_10_1_geneListName=Module_10_1)
Select motif database to use (i.e. organism and distance around TSS)
data(motifAnnotations_hgnc) motifRankings <- importRankings("./hg19-tss-centered-10kb-7species.mc9nr.feather")
Calculate enrichment
motifs_AUC <- calcAUC(Module_10_1_geneLists, motifRankings, nCores=1) Type = "Enrichment" Module = "M10_1" auc <- getAUC(motifs_AUC) ##Export the plots## pdf(paste0(Type, "_", Module, "_AUC_Histogram_Plot.pdf"), height = 5, width = 10) hist(auc, main="Module_10_1", xlab="AUC histogram", breaks=100, col="#ff000050", border="darkred") nes3 <- (3*sd(auc)) + mean(auc) abline(v=nes3, col="red") dev.off()
Select significant motifs and/or annotate to TFs
motifEnrichmentTable <- addMotifAnnotation(motifs_AUC, nesThreshold=3, motifAnnot=motifAnnotations_hgnc) ##Export the Motif enrichment analysis results### write.csv(motifEnrichmentTable, file ="./motifEnrichmentTable.csv", row.names = F, sep = ",")
Identify the genes with the best enrichment for each Motif
motifEnrichmentTable_wGenes_v1 <- addSignificantGenes(motifEnrichmentTable, rankings=motifRankings, geneSets=Module_10_1_geneLists) ##Export the Motif enrichment analysis results## write.csv(motifEnrichmentTable_wGenes_v1, file ="./motifEnrichmentTable_wGenes_v1.csv", row.names = F, sep = ",")
Plot for a few motifs
geneSetName <- "Module_10_1_Genelist" selectedMotifs <- c("cisbp__M6275", sample(motifEnrichmentTable$motif, 2)) par(mfrow=c(2,2)) Type_v1 <- "Few_motifs" ##Export the plots pdf(paste0(Type_v1, "_", Module, "_Sig_Genes_Plot.pdf"), height = 5, width = 5) getSignificantGenes(Module_10_1_geneLists, motifRankings, signifRankingNames=selectedMotifs, plotCurve=TRUE, maxRank=5000, genesFormat="none", method="aprox") dev.off()
The final output of RcisTarget is a data.table and export to html
motifEnrichmentTable_wGenes_v1_wLogo <- addLogo(motifEnrichmentTable_wGenes_v1) resultsSubset <- motifEnrichmentTable_wGenes_v1_wLogo[1:147,] Type_v2 <- "Motifs_Table" library(DT) ## export the data table to html file format### dtable <- datatable(resultsSubset[,-c("enrichedGenes", "TF_lowConf"), with=FALSE], escape = FALSE, # To show the logo filter="top", options=list(pageLength=100)) html_test <- "dtable.html" saveWidget(dtable, html_test)
Building a network and Export to the html format
signifMotifNames <- motifEnrichmentTable$motif[1:3] incidenceMatrix <- getSignificantGenes(Module_10_1_geneLists, motifRankings, signifRankingNames=signifMotifNames, plotCurve=TRUE, maxRank=5000, genesFormat="incidMatrix", method="aprox")$incidMatrix library(reshape2) edges <- melt(incidenceMatrix) edges <- edges[which(edges[,3]==1),1:2] colnames(edges) <- c("from","to") library(visNetwork) motifs <- unique(as.character(edges[,1])) genes <- unique(as.character(edges[,2])) nodes <- data.frame(id=c(motifs, genes), label=c(motifs, genes), title=c(motifs, genes), # tooltip shape=c(rep("diamond", length(motifs)), rep("elypse", length(genes))), color=c(rep("purple", length(motifs)), rep("skyblue", length(genes)))) Network <- visNetwork(nodes, edges) %>% visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE) ##Export the network## visSave(Network, file ="Network.html")