0

I have example data as follows:

library(data.table)
set.seed(1)
DT <- data.table(panelID = sample(50,50),                                                    # Creates a panel ID
                      Country = c(rep("Albania",30),rep("Belarus",50), rep("Chilipepper",20)),       
                      some_NA = sample(0:5, 6),                                             
                      some_NA_factor = sample(0:5, 6),         
                      Group = c(rep(1,20),rep(2,20),rep(3,20),rep(4,20),rep(5,20)),
                      Time = rep(seq(as.Date("2010-01-03"), length=20, by="1 month") - 1,5),
                      wt = 15*round(runif(100)/10,2),
                      Income = round(rnorm(10,-5,5),2),
                      Happiness = sample(10,10),
                      Sex = round(rnorm(10,0.75,0.3),2),
                      Age = sample(100,100),
                      Educ = round(rnorm(10,0.75,0.3),2))           
DT [, uniqueID := .I]                                                                        # Creates a unique ID                                                                                # https://stackoverflow.com/questions/11036989/replace-all-0-values-to-na
DT$some_NA_factor <- factor(DT$some_NA_factor)
DT$Group <- as.character(DT$Group)
DT2 <- copy(DT)

This is what I want to do, to convert a column (in this case colum 5 Group) to numeric if that is possible.

dfs <- c("DT", "DT2")
conv_to_num_check <- function(z) is.character(z) && (mean(grepl("^ *-?[\\d.]+(?:e-?\\d+)?$", z, perl = TRUE), na.rm=TRUE)>0.9)

for (i in length(dfs)) {
  cols <- which(sapply(get(dfs[i]), conv_to_num_check))
  setDT(get(dfs[i]))[, (cols) := lapply(.SD, as.numeric), .SDcols = cols]
}

But when I check the class:

class(DT$Group) # Is character

When I do:

cols <- which(sapply(DT, conv_to_num_check))
setDT(DT)[, (cols) := lapply(.SD, as.numeric), .SDcols = cols]
class(DT$Group) # Is numeric

It works.. What am I doing wrong?

Tom
  • 2,173
  • 1
  • 17
  • 44

1 Answers1

2

Just a tiny error in the line for (i in length(dfs)), as length(dfs) just returns 2:

for (i in length(dfs)) {
    print(i)
}
# [1] 2

It will work if you change it to:

for (i in seq_along(dfs)) {
  cols <- which(sapply(get(dfs[i]), conv_to_num_check))
  setDT(get(dfs[i]))[, (cols) := lapply(.SD, as.numeric), .SDcols = cols]
}
r2evans
  • 141,215
  • 6
  • 77
  • 149
SamR
  • 8,826
  • 3
  • 11
  • 33
  • For clarity: the `for` loop *returns* nothing, try `aa <- for (i in 1) {print(i);}` and see that `aa` is `NULL`. The `for` loop is *printing* `2` here, not returning it. (Your premise of using `seq_along` is, of course, spot-on.) – r2evans Apr 18 '22 at 11:57
  • Right yes I meant `length(dfs)` returns `2`, rather than the loop returns it, but I suppose that was a bit unclear, so thanks for the clarification. – SamR Apr 18 '22 at 12:06
  • 1
    It's pedantic, I recognize, but it is a common-enough problem with `for` loops that I think it's worth changing the language for follow-on readers. There is no return, yet new R users tend to think that it does. – r2evans Apr 18 '22 at 12:19