2

I tried to create an easier way to refer to columns with the function below, by allowing both indexes and names. See also link.

So this one works:

df <- data.table::fread("a b c d e f g h i j
                         1 2 3 4 5 6 7 8 9 10",
                                               header = TRUE)
columns <- c(1:8, "i", 9, "j")


col2num <- function(df, columns){
              nums <- as.numeric(columns)
              nums[is.na(nums)] <- which(names(df)==columns[is.na(nums)])
              return(nums)
            }

col2num(df, columns)
#> Warning in col2num(df, columns): NAs introduced by coercion
#>  [1]  1  2  3  4  5  6  7  8  9  9 10

And this one works too:

col2name <- function(df, columns){
              nums <- as.numeric(columns)
              nums[is.na(nums)] <- which(names(df)==columns[is.na(nums)])
              return(names(df)[nums])
            }

col2name(df, columns)
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "i" "j"
Warning message:
In col2name(df, columns) : NAs introduced by coercion

But when I do the following, it no longer works:

columns <- c(1:7, "j", 8, "i")
col2name <- function(df, columns){
              nums <- as.numeric(columns)
              nums[is.na(nums)] <- which(names(df)==columns[is.na(nums)])
              return(names(df)[nums])
            }

col2name(df, columns)
Error in nums[is.na(nums)] <- which(names(df) == columns[is.na(nums)]) : 
replacement has length zero

Also, this one does not work:

columns <- c("a", "j", 8, "i")
col2name <- function(df, columns){
              nums <- as.numeric(columns)
              nums[is.na(nums)] <- which(names(df)==columns[is.na(nums)])
              return(names(df)[nums])
            }

col2name(df, columns)
[1] "a" "i" "h" "a"

How can I fix this?

M--
  • 25,431
  • 8
  • 61
  • 93
Tom
  • 2,173
  • 1
  • 17
  • 44

2 Answers2

2

We just need to loop over the columns:

col2num <- function(df, columns){
  nums <- as.numeric(columns)
  nums[is.na(nums)] <- sapply(columns[is.na(as.numeric(columns))], 
                              function(x) which(names(df) == x))
  return(nums)
}

col2name <- function(df, columns){
  nums <- as.numeric(columns)
  nums[is.na(nums)] <- sapply(columns[is.na(as.numeric(columns))], 
                              function(x) which(names(df) == x))
  return(names(df)[nums])
}

columns1 <- c(1:8, "i", 9, "j")
columns2 <- c(1:7, "j", 8, "i")

suppressWarnings(col2name(df, columns1))
#>  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "i" "j"

suppressWarnings(col2num(df, columns1))
#>  [1]  1  2  3  4  5  6  7  8  9  9 10


suppressWarnings(col2num(df, columns2))
#>  [1]  1  2  3  4  5  6  7 10  8  9

suppressWarnings(col2name(df, columns2))
#>  [1] "a" "b" "c" "d" "e" "f" "g" "j" "h" "i"

I am using suppressWarnings to avoid getting following warning each time I run the function:

Warning messages:
1: In col2name(df, columns) : NAs introduced by coercion
2: In lapply(X = X, FUN = FUN, ...) : NAs introduced by coercion
M--
  • 25,431
  • 8
  • 61
  • 93
1

An alternative with the disadvantage of necessitating that the data be a data.frame object:

indexr<- function(df, cols){
  to_match<-cols[grep("[A-za-z]",cols)]
  matched<-match(to_match,names(df))
  numerics <- as.numeric(c(setdiff(cols,to_match),matched))


  df[c(numerics)]
}



 indexr(iris,c(1,"Sepal.Width"))
    Sepal.Length Sepal.Width
1            5.1         3.5
2            4.9         3.0
3            4.7         3.2

With your data(drawback is that we go back to a data.frame). Might define a method for that.

data.table::setDF(df)
indexr(df,columns)
  a b c d e f g h i i.1  j
1 1 2 3 4 5 6 7 8 9   9 10

Edit To return names instead:

indexr<- function(df, cols){
  to_match<-cols[grep("[A-za-z]",cols)]
  matched<-match(to_match,names(df))
  numerics <- as.numeric(c(setdiff(cols,to_match),matched))


  names(df[c(numerics)])
}


 indexr(mtcars,c("mpg",5))
    [1] "drat" "mpg" 


  indexr(df,columns)
 [1] "a"   "b"   "c"   "d"   "e"   "f"   "g"   "h"   "i"   "i.1"
[11] "j"  
NelsonGon
  • 13,015
  • 7
  • 27
  • 57