Converting factors to numeric has been dealt with multiple times, but my issue is when I have multiple numbers within the factor. For instance, here is a small subset of my data.frame:
AF AC AN EAS_AF AMR_AF
1 0.000199681 1 5008 0.001 0.0
2 0.00319489 16 5008 0.0 0.0
3 0.024361, 0.00479233 122, 24 5008 0.0, 0.0 0.0043, 0.0014
4 0.00439297 22 5008 0.0 0.0014
5 0.000798722 4 5008 0.0 0.0
Normally I would use the as.numeric
and levels
functions in combination to convert these factors into numbers. However, row three has two numbers in each entry, and therefore I get an NA when attempting this method on these variables. Is there any way to get round this? I have too many of these such cases to manually pluck them out.
My overall objective is to test whether each entry in each of these columns is greater than 0 (so if there are two numbers, I would test both), which is why I am attempting to convert into numeric in the first place. If there is any other smarter way around this problem I'd be willing to try it.
As requested, below is the dput
of a reduced version of my data frame (taking only the first 10 rows).
structure(list(CHROM = c(10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L), POS = c(180109L, 209892L, 221335L, 239445L, 246927L, 246928L,
246933L, 246955L, 246970L), ID = structure(c(6L, 4L, 1L, 3L,
5L, 9L, 2L, 7L, 8L), .Label = c("rs143013573", "rs1431845", "rs145483680",
"rs151111729", "rs547339499", "rs547699134", "rs556577288", "rs575589407",
"rs72770983"), class = "factor"), REF = structure(c(3L, 2L, 2L,
3L, 1L, 1L, 3L, 2L, 1L), .Label = c("A", "C", "G"), class = "factor"),
ALT = structure(c(1L, 2L, 3L, 1L, 2L, 2L, 1L, 4L, 2L), .Label = c("A",
"G", "G, T", "T"), class = "factor"), AF = structure(c(1L,
5L, 7L, 6L, 2L, 4L, 8L, 3L, 1L), .Label = c("0.000199681",
"0.000798722", "0.000998403", "0.00239617", "0.00319489",
"0.00439297", "0.024361, 0.00479233", "0.220248"), class = "factor"),
AC = structure(c(1L, 5L, 4L, 6L, 7L, 3L, 2L, 8L, 1L), .Label = c("1",
"1103", "12", "122, 24", "16", "22", "4", "5"), class = "factor"),
AN = c(5008L, 5008L, 5008L, 5008L, 5008L, 5008L, 5008L, 5008L,
5008L), EAS_AF = structure(c(3L, 1L, 2L, 1L, 1L, 3L, 4L,
1L, 1L), .Label = c("0.0", "0.0, 0.0", "0.001", "0.248"), class = "factor"),
AMR_AF = structure(c(1L, 1L, 3L, 2L, 1L, 2L, 4L, 1L, 2L), .Label = c("0.0",
"0.0014", "0.0043, 0.0014", "0.1599"), class = "factor"),
AFR_AF = structure(c(1L, 3L, 5L, 4L, 2L, 1L, 6L, 1L, 1L), .Label = c("0.0",
"0.003", "0.0121", "0.0159", "0.09, 0.0", "0.1611"), class = "factor"),
EUR_AF = structure(c(1L, 1L, 2L, 1L, 1L, 3L, 4L, 1L, 1L), .Label = c("0.0",
"0.0, 0.0089", "0.0089", "0.2495"), class = "factor"), SAS_AF = structure(c(1L,
1L, 2L, 1L, 1L, 3L, 5L, 4L, 1L), .Label = c("0.0", "0.0, 0.0143",
"0.001", "0.0051", "0.2843"), class = "factor"), consequence = structure(c(2L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("stop_gained",
"synonymous_variant"), class = "factor"), gene = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "ZMYND11", class = "factor"),
accession = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = "NM_006624.5", class = "factor"), gene_type = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "protein_coding", class = "factor")), .Names = c("CHROM",
"POS", "ID", "REF", "ALT", "AF", "AC", "AN", "EAS_AF", "AMR_AF",
"AFR_AF", "EUR_AF", "SAS_AF", "consequence", "gene", "accession",
"gene_type"), class = "data.frame", row.names = c(NA, -9L))