Hi I've got a function which performs calculations on several columns (written with kind support form stack overflow community). I'd like to adapt it so that instead of calculating MOD for each iteration, it looks up a value in a table and uses that. The MOD for each column will be different.
Here is the function that does the calculations:
library(data.table)
dataDT<- data.frame(CAG = c(13, 14, 15, 17),
A01 = c(6485,35,132, 12),
A02 = c(0,42,56, 4))
thres <- 0.2
dataDT<-setDT(dataDT)
colsToBeUsed<-names(dataDT[,!'CAG'])
sumDataSetdata<-c()
sumDataSet<-unlist(lapply(X=1:length(colsToBeUsed),function(X){s=colsToBeUsed[X]
eval(parse(text=paste0('dataDT[',s,'<thres*max(',s,'),',s,':=0]')))
eval(parse(text=paste0('dataDT[,MOD',s,':=dataDT[',s,'==max(',s,'),CAG]]')))
eval(parse(text=paste0('dataDT[,norm',s,':=',s,'/sum(',s,')]')))
eval(parse(text=paste0('dataDT[,sum',s,':=',s,'/sum(',s,')*(CAG-MOD',s,'),]')))
eval(parse(text=paste0('rbind(sumDataSetdata,dataDT[,sum(sum',s,')])')))
}))
Here is the table which gives the MOD:
MODs <- data.frame(c(data.frame(samples = c('A01', 'A02', 'A03', 'A04'), MOD = c(117.8, 120.2, 124.5, 130.6))
Here is the table which says which MOD to use for each 'sample' column
ctrls <- (data.frame(samples = c('A01', 'A02', 'A03', 'A04'), ctrl = c(A01, A01, A03, A03))
Response to answer 1
Thank you, that works well for the example. I've been trying to apply it to my real data and am having a few difficulties. Here is the code for my real data.
library(data.table)
dataDT <- data.frame(area[,7:ncol(height)])
dataDT <- setDT(dataDT)
colsToBeUsed<-names(dataDT[,!'CAG'])
MODs <- data.frame(samples = samples$unique.inputdf.SampleFileName., MOD = htresults$mode)
ctrls <- data.frame(samples = samples$unique.inputdf.SampleFileName., ctrl = 'A01_RR20170609_FA_A01_2017-06-09_1.fsa')
myFun <- function(x, mod, cag, thres) {
x[x < (thres * max(x))] <- 0
norm_x <- x / sum(x)
sum_x <- norm_x * (cag - mod)
sum(sum_x)
}
transision_matrix <- merge(ctrls, MODs, by.x = "ctrl", by.y = "samples")
setDT(transision_matrix)
mf2 <- function(colname, dataDT, transision_matrix){
x <- dataDT[, colname, with = F][[1]]
mod <- transision_matrix[samples == colname, MOD]
cag <- dataDT[, "CAG"][[1]]
myFun(x, mod, cag, thres = 0.2)
}
sapply(colsToBeUsed, function(x) mf2(x, dataDT, transision_matrix))
The control sample for all columns in this experiment is A01_RR20170609_FA_A01_2017.06.09_1.fsa and its mode is 20.67000
This is the result I get
A01_RR20170609_FA_A01_2017.06.09_1.fsa A02_RR20170609_FA_A02_2017.06.09_1.fsa
0 0
A03_RR20170609_FA_A03_2017.06.09_1.fsa A04_RR20170609_FA_A04_2017.06.09_1.fsa
0 0
A05_RR20170609_FA_A05_2017.06.09_1.fsa A06_RR20170609_FA_A06_2017.06.09_1.fsa
0 0
A07_RR20170609_FA_A07_2017.06.09_1.fsa A08_RR20170609_FA_A08_2017.06.09_1.fsa
0 0
A09_RR20170609_FA_A09_2017.06.09_1.fsa A10_RR20170609_FA_A10_2017.06.09_1.fsa
0 0
A11_RR20170609_FA_A11_2017.06.09_1.fsa A12_RR20170609_FA_A12_2017.06.09_1.fsa
0 0
The results I'm expecting are:
[1] 4.108246 5.868355 4.608756 -1.159657 4.015066 4.364199 5.262355 4.337760 6.496672 5.574396
[11] 5.102111 8.911440
In case it's useful, here is info about the table 'height':
'data.frame': 660 obs. of 19 variables:
$ Dye/SamplePeak : chr "B,66" "B,67" "B,68" "B,69" ...
$ Marker : chr NA NA NA NA ...
$ Allele : chr NA NA NA NA ...
$ Size : num 144 147 148 150 151 ...
$ Area : num 148288 110 907 3355 1274 ...
$ DataPoint : num 2591 2622 2641 2655 2671 ...
$ CAG : num 13.9 14.9 15.5 15.9 16.4 ...
$ A01_RR20170609_FA_A01_2017-06-09_1.fsa: num 6485 32 125 450 211 ...
$ A02_RR20170609_FA_A02_2017-06-09_1.fsa: num 0 0 0 0 0 0 0 0 0 0 ...
$ A03_RR20170609_FA_A03_2017-06-09_1.fsa: num 0 0 0 0 0 0 0 0 0 0 ...
$ A04_RR20170609_FA_A04_2017-06-09_1.fsa: num 0 0 0 0 0 0 0 0 0 0 ...
$ A05_RR20170609_FA_A05_2017-06-09_1.fsa: num 0 0 0 0 0 0 0 0 0 0 ...
$ A06_RR20170609_FA_A06_2017-06-09_1.fsa: num 0 0 0 0 0 0 0 0 0 0 ...
$ A07_RR20170609_FA_A07_2017-06-09_1.fsa: num 0 0 0 0 0 0 0 0 0 0 ...
$ A08_RR20170609_FA_A08_2017-06-09_1.fsa: num 0 0 0 0 0 0 0 0 0 0 ...
$ A09_RR20170609_FA_A09_2017-06-09_1.fsa: num 0 0 0 0 0 0 0 0 0 0 ...
$ A10_RR20170609_FA_A10_2017-06-09_1.fsa: num 0 0 0 0 0 0 0 0 0 0 ...
$ A11_RR20170609_FA_A11_2017-06-09_1.fsa: num 0 0 0 0 0 0 0 0 0 0 ...
$ A12_RR20170609_FA_A12_2017-06-09_1.fsa: num 0 0 0 0 0 0 0 0 0 0 ...
The table 'height' is quite large, but I've added the results from column 'CAG' and 'A01_RR20170609_FA_A01_2017.06.09_1.fsa' in case it's useful for working.
**CAG**
[1] 13.92667 14.88000 15.46333 15.89333 16.38333 16.84333 17.39667 17.79667 18.41000 18.78000 19.39333
[12] 19.76000 20.33667 20.67000 21.03333 21.36667 23.52000 24.45667 25.39667 27.31000 28.34667 29.32000
[23] 30.23333 31.17667 32.15000 32.72667 33.08333 33.67667 34.03667 34.60667 34.96667 35.54000 35.93333
[34] 36.54000 36.87333 37.49333 37.87333 38.50667 38.89000 39.48333 39.84667 40.45000 40.81000 41.41000
[45] 41.73667 42.45333 42.77667 43.34000 43.69333 46.21000 49.21333 52.21667 52.60667 13.90000 14.95000
[56] 15.47333 15.99667 16.46000 16.86333 17.48000 17.85000 18.43667 18.80667 19.42667 19.76333 20.43667
[67] 21.38667 21.72333 22.70000 23.31333 23.61667 24.23000 24.59667 25.60333 26.55333 27.16333 27.44000
[78] 28.42000 29.40000 30.37667 30.98667 31.32333 32.30000 33.23667 34.16667 35.13333 36.07667 37.05667
[89] 38.03333 38.63333 39.01667 39.60667 39.97333 40.57667 40.94000 41.54333 41.87333 42.47333 42.83333
[100] 43.46000 43.78667 44.70333 12.48333 13.03667 13.92667 14.88000 15.49333 15.86333 16.44667 16.78333
[111] 17.36667 17.82667 18.38000 18.84333 19.42333 19.79000 20.34000 20.79667 21.28333 21.74333 23.60000
[122] 24.54000 25.51333 26.48667 27.43333 28.40667 29.32000 29.96000 30.26333 30.90333 31.36000 32.24000
[133] 33.17333 34.25667 35.40667 36.01667 36.56667 36.99667 37.58667 37.97000 38.60333 38.98333 39.57667
[144] 39.94000 40.54667 40.90667 41.51000 41.86667 42.43667 42.79333 43.38667 43.74000 12.63667 13.95667
[155] 15.52333 15.92333 16.53667 16.90667 17.49000 17.89000 18.44333 18.81000 19.42333 19.79000 20.40000
[166] 20.76667 21.37667 21.74333 22.38000 24.54000 25.51333 27.46333 28.43667 29.04667 29.38000 30.29333
[177] 31.26667 32.24000 33.17333 34.10667 35.04333 35.68000 35.98667 36.59667 36.96333 37.55667 37.97000
[188] 38.60333 38.95333 39.57667 39.94000 40.51333 40.87333 41.47333 41.83333 42.43000 42.75667 43.35000
[199] 43.70667 44.29667 44.65000 45.24000 45.56000 46.54333 47.54333 12.45333 13.04000 13.87333 14.86667
[210] 15.48667 15.86000 16.44667 16.79000 17.37667 17.81333 18.43333 18.80667 19.42667 19.79667 20.41333
[221] 20.75000 21.36667 21.73333 22.59667 23.54667 24.53000 25.51000 26.46000 27.44000 28.38667 29.00000
[232] 29.33667 29.91667 30.28333 31.26333 31.84333 33.08667 34.08000 34.68667 35.08333 35.97333 36.37333
[243] 36.96000 37.59000 37.94667 38.59000 38.98000 39.57667 39.94333 40.55000 40.88333 41.48667 41.85000
[254] 42.42000 42.77667 43.37333 43.73000 44.29333 44.64667 45.26667 45.56000 12.57667 12.94000 13.55333
[265] 13.92000 14.50000 14.86667 15.41667 15.87667 16.51667 16.88667 17.49667 17.86333 18.47667 18.81333
[276] 19.42333 19.79000 20.40000 20.76667 21.40667 21.74333 23.60000 27.42667 28.94333 29.76333 30.33667
[287] 32.21333 33.17000 34.09333 34.57333 35.05667 35.69000 36.05333 36.69000 36.99667 37.58667 37.97000
[298] 38.60333 38.98333 39.60333 39.96333 40.56333 40.92000 41.48667 41.84333 42.43333 42.78667 43.40667
[309] 43.70000 12.05333 12.51333 12.91333 13.90000 14.48667 14.85667 15.47667 15.78333 16.40000 16.83333
[320] 17.48000 17.88333 18.47000 18.81000 19.39333 19.79000 20.40000 20.76667 21.34667 21.71000 22.26000
[331] 22.62333 23.56667 24.57333 26.43000 27.99333 28.66667 29.40000 31.29333 32.30000 33.17000 34.09000
[342] 34.69000 35.08000 35.65333 36.02000 36.23000 36.93333 37.52667 37.94667 38.59000 38.94667 39.57667
[353] 39.94000 40.54333 40.90333 41.47333 41.83333 42.42667 42.78333 43.40333 43.72667 52.24333 52.57667
[364] 12.02667 12.30000 13.73667 14.99333 15.57333 15.94000 16.52333 16.92000 17.44000 17.96000 18.44667
[375] 18.81333 19.45333 19.81667 20.42000 20.78333 21.35667 21.66000 22.41333 24.52333 25.52000 26.45333
[386] 27.42667 28.06333 28.33333 28.94000 29.36333 29.91000 30.30333 30.88000 31.24333 31.85000 32.18333
[397] 33.14333 33.77000 34.13000 34.70000 35.03000 35.66333 36.02667 36.63333 37.00000 37.58333 37.99000
[408] 38.58333 38.96000 39.57333 39.93333 40.53000 40.88667 41.48000 41.83667 42.43000 42.78333 43.37000
[419] 43.69333 44.36667 44.69000 45.27333 45.65000 46.24000 46.54000 50.86667 51.52667 12.42333 13.03333
[430] 13.85667 14.47000 14.86667 15.47667 15.84667 16.39667 16.70000 17.40667 17.80333 19.21000 19.79000
[441] 20.18667 20.73667 21.31667 21.71000 22.20000 22.59333 23.56667 24.51000 25.48333 26.42667 27.03333
[452] 27.33667 28.33667 29.30667 29.94333 30.91333 31.91000 32.78667 33.64667 34.00333 34.63333 34.99333
[463] 35.59667 35.99333 36.54000 36.93667 37.52333 37.90333 38.54000 38.92000 39.54667 39.90667 40.51000
[474] 40.84000 41.44000 41.79667 42.39333 42.74667 43.31000 43.69333 44.22000 52.21667 52.60667 12.27000
[485] 13.79333 14.95000 15.44000 16.01667 16.44333 16.84000 17.47667 17.84333 18.45000 18.81667 19.42333
[496] 19.69667 20.12000 20.48000 21.38667 21.68667 22.59333 23.52667 24.52333 25.12667 25.45667 26.45667
[507] 26.97333 27.39667 28.33667 28.97333 29.36667 29.91333 30.27667 31.24667 31.82000 32.18333 33.14000
[518] 34.06000 34.65667 35.01667 35.61333 35.97333 36.54667 36.94000 37.52000 37.92667 38.55333 38.92667
[529] 39.51333 39.87000 40.46667 40.82000 41.41333 41.76667 42.35667 42.71000 43.29667 43.64667 44.23000
[540] 44.58000 45.21667 45.53667 46.15000 46.51000 54.90333 12.13000 12.51667 13.81333 14.84000 15.44333
[551] 15.89333 16.74000 17.37333 17.76333 18.33667 18.73000 19.30333 19.60333 20.37667 21.30333 21.63000
[562] 22.52333 23.50667 24.48667 25.46667 26.38667 26.92333 27.34000 27.90667 28.29333 29.24333 30.22667
[573] 30.79333 31.24000 31.83333 32.43000 32.93000 33.46000 34.02333 34.67333 34.97000 35.53667 35.92333
[584] 36.52333 36.88333 37.48667 37.85667 38.50333 38.87333 39.45333 39.83667 40.42667 40.81000 41.36667
[595] 41.71667 42.33000 42.68000 43.26333 43.61000 44.56333 48.30000 12.46000 12.84667 13.83333 14.43000
[606] 14.82000 15.35667 15.65667 16.40333 17.42000 17.78000 18.55667 18.88333 19.30333 19.69000 20.31333
[617] 20.64000 21.23333 21.61667 22.21000 22.47667 23.45667 24.04667 24.40333 24.99333 25.35000 25.94000
[628] 26.29667 26.92000 27.27667 27.84000 28.25667 28.85000 29.20333 30.15333 31.13000 31.72000 32.10667
[639] 32.69667 33.04333 33.91667 34.91000 35.91000 36.44333 36.79667 37.39333 37.79333 38.41000 38.78000
[650] 39.39333 39.77333 40.36000 40.71000 41.29000 41.64000 42.24667 42.59333 43.54667 44.49000 45.40333
A01_RR20170609_FA_A01_2017.06.09_1.fsa
[1] 6485 32 125 450 211 703 553 1549 1360 3526 5028 13610 15986 31233 713 1260
[17] 31 37 33 46 43 48 40 63 78 33 118 40 176 65 296 103
[33] 501 242 923 545 2006 1355 4348 2564 8615 3886 12985 227 669 85 61 57
[49] 103 32 42 50 64 0 0 0 0 0 0 0 0 0 0 0
[65] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[81] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[97] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[113] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[129] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[145] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[161] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[177] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[193] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[209] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[225] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[241] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[257] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[273] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[289] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[305] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[321] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[337] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[353] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[369] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[385] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[401] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[417] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[433] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[449] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[465] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[481] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[497] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[513] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[529] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[545] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[561] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[577] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[593] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[609] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[625] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[641] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[657] 0 0 0 0
Response to answer 2 dputs as requested
dput(transision_matrix)
structure(list(ctrl = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "A01_RR20170609_FA_A01_2017-06-09_1.fsa", class = "factor"),
samples = structure(1:12, .Label = c("A01_RR20170609_FA_A01_2017-06-09_1.fsa",
"A02_RR20170609_FA_A02_2017-06-09_1.fsa", "A03_RR20170609_FA_A03_2017-06-09_1.fsa",
"A04_RR20170609_FA_A04_2017-06-09_1.fsa", "A05_RR20170609_FA_A05_2017-06-09_1.fsa",
"A06_RR20170609_FA_A06_2017-06-09_1.fsa", "A07_RR20170609_FA_A07_2017-06-09_1.fsa",
"A08_RR20170609_FA_A08_2017-06-09_1.fsa", "A09_RR20170609_FA_A09_2017-06-09_1.fsa",
"A10_RR20170609_FA_A10_2017-06-09_1.fsa", "A11_RR20170609_FA_A11_2017-06-09_1.fsa",
"A12_RR20170609_FA_A12_2017-06-09_1.fsa"), class = "factor"),
MOD = c(20.67, 20.67, 20.67, 20.67, 20.67, 20.67, 20.67,
20.67, 20.67, 20.67, 20.67, 20.67)), .Names = c("ctrl", "samples",
"MOD"), row.names = c(NA, -12L), class = c("data.table", "data.frame"
), .internal.selfref = <pointer: 0x10180d178>, index = structure(integer(0), "`__samples`" = integer(0)))
> dput(ctrls)
structure(list(samples = structure(1:12, .Label = c("A01_RR20170609_FA_A01_2017-06-09_1.fsa",
"A02_RR20170609_FA_A02_2017-06-09_1.fsa", "A03_RR20170609_FA_A03_2017-06-09_1.fsa",
"A04_RR20170609_FA_A04_2017-06-09_1.fsa", "A05_RR20170609_FA_A05_2017-06-09_1.fsa",
"A06_RR20170609_FA_A06_2017-06-09_1.fsa", "A07_RR20170609_FA_A07_2017-06-09_1.fsa",
"A08_RR20170609_FA_A08_2017-06-09_1.fsa", "A09_RR20170609_FA_A09_2017-06-09_1.fsa",
"A10_RR20170609_FA_A10_2017-06-09_1.fsa", "A11_RR20170609_FA_A11_2017-06-09_1.fsa",
"A12_RR20170609_FA_A12_2017-06-09_1.fsa"), class = "factor"),
ctrl = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), class = "factor", .Label = "A01_RR20170609_FA_A01_2017-06-09_1.fsa")), .Names = c("samples",
"ctrl"), row.names = c(NA, -12L), class = "data.frame")
> dput(MODs)
structure(list(samples = structure(1:12, .Label = c("A01_RR20170609_FA_A01_2017-06-09_1.fsa",
"A02_RR20170609_FA_A02_2017-06-09_1.fsa", "A03_RR20170609_FA_A03_2017-06-09_1.fsa",
"A04_RR20170609_FA_A04_2017-06-09_1.fsa", "A05_RR20170609_FA_A05_2017-06-09_1.fsa",
"A06_RR20170609_FA_A06_2017-06-09_1.fsa", "A07_RR20170609_FA_A07_2017-06-09_1.fsa",
"A08_RR20170609_FA_A08_2017-06-09_1.fsa", "A09_RR20170609_FA_A09_2017-06-09_1.fsa",
"A10_RR20170609_FA_A10_2017-06-09_1.fsa", "A11_RR20170609_FA_A11_2017-06-09_1.fsa",
"A12_RR20170609_FA_A12_2017-06-09_1.fsa"), class = "factor"),
MOD = c(20.67, 19.7633333333333, 16.7833333333333, 21.7433333333333,
16.79, 14.8666666666667, 15.7833333333333, 21.66, 16.3966666666667,
19.6966666666667, 19.6033333333333, 15.3566666666667)), .Names = c("samples",
"MOD"), row.names = c(NA, -12L), class = "data.frame")