Problem statement
I have this function daeqtl_mapping_()
that takes in three data tables: snp_pairs
, zygosity
and ae
.
I am iterating over each row of snp_pairs
, and updating it with two new columns.
The data tables zygosity
and ae
are only read and not modified at all.
I'd like to know if there is a way of parallelizing the for-loop while sharing these three data tables, i.e. not copy them over to threads (which is, I think, what e.g. parallel::mclapply()
would do).
Function daeqtl_mapping()
daeqtl_mapping_ <- function(snp_pairs, zygosity, ae) {
# Ensure that the data tables are all keyed.
# `snp_pairs` keys: first two columns
data.table::setkeyv(snp_pairs, colnames(snp_pairs)[1:2])
# `zygosity` keys: first column
data.table::setkeyv(zygosity, colnames(zygosity)[1])
# `ae` keys: first column
data.table::setkeyv(ae, colnames(ae)[1])
n <- nrow(snp_pairs)
for(i in seq_len(n)) {
candidate_snp <- snp_pairs[[i, 'candidate_snp']]
dae_snp <- snp_pairs[[i, 'dae_snp']]
if(dae_snp %is_not_in% snp_pairs) {
datatable::set(snp_pairs, i = i, j = 'pvalue', value = NA)
datatable::set(snp_pairs, i = i, j = 'case', value = 0L)
break
}
if(dae_snp %is_not_in% ae) {
datatable::set(snp_pairs, i = i, j = 'pvalue', value = NA)
datatable::set(snp_pairs, i = i, j = 'case', value = 0L)
break
}
# `csnp_hom`: lgl indicating homozygous samples for the candidate snp
csnp_hom <- is_hom(dt = zygosity, snp = candidate_snp)
# `csnp_het`: lgl indicating heterozygous samples for the candidate snp
csnp_het <- is_het(dt = zygosity, snp = candidate_snp)
# `dsnp_het`: lgl indicating heterozygous samples for the dae snp
dsnp_het <- is_het(dt = zygosity, snp = dae_snp)
# `ae_hom`: dbl with allelic ratios of homozygous samples (candidate snp)
ae_hom <- as.numeric(ae[dae_snp][, 1 := NULL][, dsnp_het & csnp_hom, with = FALSE])
# `ae_het`: dbl with allelic ratios of heterozygous samples (candidate snp)
ae_het <- as.numeric(ae[dae_snp][, 1 := NULL][, dsnp_het & csnp_het, with = FALSE])
df <- daeqtl_test(ae_hom = ae_hom, ae_het = ae_het)
for (col in names(df)) data.table::set(snp_pairs, i = i, j = col, value = df[[col]])
}
}
MWE
Please first install daeqtlr with remotes::install_github("maialab/daeqtlr")
.
library(daeqtlr)
# Download the RDS files from this google drive folder
# https://drive.google.com/drive/folders/1lefDRPW2W6jBMnwUQIy4VGD9ssks4R-E?usp=sharing
snp_pairs <- data.table::setDT(readRDS(file = '~/dwl/snp_pairs.rds'))
zygosity <- data.table::setDT(readRDS(file = '~/dwl/zygosity.rds'))
ae <- data.table::setDT(readRDS(file = '~/dwl/ae.rds'))
daeqtlr:::daeqtl_mapping_(snp_pairs, zygosity, ae)
snp_pairs
#> dae_snp candidate_snp chromosome dae_snp_position
#> 1: rs10035235 rs1001420 5 40798542
#> 2: rs10035235 rs1001684 5 40798542
#> 3: rs10035235 rs1002424 5 40798542
#> 4: rs10035235 rs1002922 5 40798542
#> 5: rs10035235 rs10043093 5 40798542
#> ---
#> 1482: rs10035418 rs9324855 5 141437664
#> 1483: rs10035418 rs9324858 5 141437664
#> 1484: rs10035418 rs9324859 5 141437664
#> 1485: rs10035418 rs9686896 5 141437664
#> 1486: rs10035418 rs998794 5 141437664
#> candidate_snnp_position pvalue case
#> 1: 41000241 0.1155707 4
#> 2: 40810324 0.4341583 4
#> 3: 40767295 0.2739109 4
#> 4: 40386453 0.9556968 4
#> 5: 40324118 0.3911479 4
#> ---
#> 1482: 141693599 0.9098420 4
#> 1483: 141884896 0.8291068 4
#> 1484: 141899785 0.7482426 4
#> 1485: 141731340 0.9556116 4
#> 1486: 141187135 0.6369524 4
Created on 2022-04-26 by the reprex package (v2.0.1)