I've got a really weird problem with a matrix of doubles turning into a matrix of strings. I cut down my code to the following problem:
num.samples <- nrow(expr.matrix)
num.genes <- ncol(expr.matrix)
gene.names <- colnames(expr.matrix)
# Define a function which returns a vector in order to...
execute.per.gene <- function(target.gene, ...) {
# Uninteresting code
x <- expr.matrix[,setdiff(1:num.genes, target.gene)]
y <- expr.matrix[,target.gene]
rf <- randomForest(x, y, mtry=10, ntree=100, importance=TRUE)
# Calculate importance measure
im <- importance(rf)[,"IncNodePurity"]
# Divide by number of samples
im / num.samples
}
# ... execute mclapply!
all.output <- mclapply(1:num.genes, execute.per.gene, mc.cores=mc.cores)
# Initialise matrix
weight.matrix <- matrix(0.0, nrow=num.genes, ncol=num.genes)
rownames(weight.matrix) <- gene.names
colnames(weight.matrix) <- gene.names
# And now I merge the results from 'all.output' into the weight.matrix
for (target.gene in 1:num.genes) {
# Get result
im <- all.output[[target.gene]]
# Find which rows to change for this column
cand.tf.idx <- match(names(im), gene.names)
# Merge results into output matrix
weight.matrix[cand.tf.idx, target.gene] <- im
}
# And suddenly, the matrix consists of a bunch of strings!
if (!is.numeric(weight.matrix[[1,1]])) { # dafuq
cat("\nEncountered strings! :/\n")
print(weight.matrix)
# recently added this for debugging purposes:
print(sapply(all.output, class))
browser()
}
The output is:
Encountered strings! :/
G1 G7 G9 G23 G26 G28 G29 G33 G44 G48 G50 G52 G55 G59 G63 G64 G69 G70
G1 "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
G7 "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
G9 "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
G23 "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
G26 "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
...
And I have no clue why the matrix is turning into a bunch of strings containing zeros. After all, in the execute.per.gene
method, the last thing I do is divide the vector by an integer and the problem doesn't throw an exception at that point, so therefore im
must be still a vector of doubles at that point.
Does anybody see where the problem lies? What am I doing wrong?
Update, im
should always look somewhat like the following. If im
already consisted of strings, I figure im / num.samples
would fail.
> dput(im)
structure(c(3.86421872658217, 0.0600404651226161, 0.0729843866848986,
0.0556398483535666, 0.0488815568218319, 0.0526059937835038, 0.170688282373908,
0.129655447072086, 0.174050696716209, 0.244770969072866, 0.170282014024477,
0.100440545265572, 0.0634773436494396, 0.0696835665372604, 0.118303002740336,
0.0493612110879677, 0.103414668075989, 0.0149516634700066, 0.0377397612656266,
0.0462366818296757, 0.0534595079995701, 0.0418429987271517, 0.0521335103883387,
0.0454590053400778, 0.0620792864477719, 0.0528642019860386, 0.0440233200010488,
....
2.4293680818691, 0.0455845647048088, 0.0480473721971548, 0.0493418345253576,
0.0468391879447859, 1.53509517636789, 0.0639471582428624, 0.155340800410008,
0.0668494853135931, 0.0436381864919185, 1.09024170028797, 0.0649503734307499,
0.0490042073829033, 0.0304435411561372, 0.034892331733943, 0.0759421587532521,
0.0666974014679768, 0.913196971375135, 0.0550660353121449, 1.36191204205922,
3.63194611493454, 0.177078251458191, 0.17856008667256, 0.0499985787306069,
0.0465138307009715, 0.071656156183379, 0.0441178391009568, 0.239933902772204,
0.0719828575374175, 0.0654148345872996, 0.920668929212975, 0.0454979263784418,
2.92899170564573, 0.0208273505572265, 0.0397416566013167, 0.197310579354446,
0.0313568556466712), .Names = c("sample_2", "sample_3", "sample_4",
"sample_5", "sample_6", "sample_7", "sample_8", "sample_9", "sample_10",
"sample_11", "sample_12", "sample_13", "sample_14", "sample_15",
"sample_16", "sample_17", "sample_18", "sample_19", "sample_20",
"sample_21", "sample_22", "sample_23", "sample_24", "sample_25",
"sample_26", "sample_27", "sample_28", "sample_29", "sample_30",
"sample_31", "sample_32", "sample_33", "sample_34", "sample_35",
"sample_36", "sample_37", "sample_38", "sample_39", "sample_40",
"sample_41", "sample_42", "sample_43", "sample_44", "sample_45",
"sample_46", "sample_47", "sample_48", "sample_49", "sample_50",
"sample_51", "sample_52", "sample_53", "sample_54", "sample_55",
"sample_56", "sample_57", "sample_58", "sample_59", "sample_60",
"sample_61", "sample_62", "sample_63", "sample_64", "sample_65",
"sample_66", "sample_67", "sample_68", "sample_69", "sample_70",
"sample_71", "sample_72", "sample_73", "sample_74", "sample_75",
....
"sample_801", "sample_802", "sample_803", "sample_804", "sample_805"
))
And more information on gene.names
, num.samples
and num.genes
:
> dput(num.samples)
1643L
> dput(num.genes)
805L
> dput(gene.names)
c("sample_1", "sample_2", "sample_3", "sample_4", "sample_5",
"sample_6", "sample_7", "sample_8", "sample_9", "sample_10",
"sample_11", "sample_12", "sample_13", "sample_14", "sample_15",
"sample_16", "sample_17", "sample_18", "sample_19", "sample_20",
"sample_21", "sample_22", "sample_23", "sample_24", "sample_25",
"sample_26", "sample_27", "sample_28", "sample_29", "sample_30",
"sample_31", "sample_32", "sample_33", "sample_34", "sample_35",
"sample_36", "sample_37", "sample_38", "sample_39", "sample_40",
...
"sample_741", "sample_742", "sample_743", "sample_744", "sample_745",
"sample_746", "sample_747", "sample_748", "sample_749", "sample_750",
"sample_751", "sample_752", "sample_753", "sample_754", "sample_755",
"sample_756", "sample_757", "sample_758", "sample_759", "sample_760",
"sample_761", "sample_762", "sample_763", "sample_764", "sample_765",
"sample_766", "sample_767", "sample_768", "sample_769", "sample_770",
"sample_771", "sample_772", "sample_773", "sample_774", "sample_775",
"sample_776", "sample_777", "sample_778", "sample_779", "sample_780",
"sample_781", "sample_782", "sample_783", "sample_784", "sample_785",
"sample_786", "sample_787", "sample_788", "sample_789", "sample_790",
"sample_791", "sample_792", "sample_793", "sample_794", "sample_795",
"sample_796", "sample_797", "sample_798", "sample_799", "sample_800",
"sample_801", "sample_802", "sample_803", "sample_804", "sample_805"
)