Another option would be to use ==
.
. <- unique(df$x)
cbind(df, +do.call(cbind, lapply(setNames(., .), `==`, df$x)))
# n x C I L T Y
#1 1 I 0 1 0 0 0
#2 2 C 1 0 0 0 0
#3 3 C 1 0 0 0 0
#4 4 Y 0 0 0 0 1
#5 5 L 0 0 1 0 0
#6 6 T 0 0 0 1 0
#...
Or in one line using sapply
.
cbind(df, +sapply(unique(df$x), `==`, df$x))
Or use contrasts
and match
them to df$x
.
. <- contrasts(as.factor(df$x), FALSE)
#. <- contrasts(as.factor(unique(df$x)), FALSE) #Alternative
cbind(df, .[match(df$x, rownames(.)),])
#cbind(df, .[fastmatch::fmatch(df$x, rownames(.)),]) #Alternative
Or indexing in a matrix
.
. <- unique(df$x) #Could be sorted
#. <- collapse::funique(df$x) #Alternative
#. <- kit::funique(df$x) #Alternative
i <- match(df$x, .)
#i <- fastmatch::fmatch(df$x, .) #Alternative
#i <- data.table::chmatch(df$x, .) #Alternative
nc <- length(.)
nr <- length(i)
cbind(df, matrix(`[<-`(integer(nc * nr), 1:nr + nr * (i - 1), 1), nr, nc,
dimnames=list(NULL, .)))
Or using outer
.
. <- unique(df$x)
cbind(df, +outer(df$x, setNames(., .), `==`))
Or using rep
and m̀atrix`.
. <- unique(df$x)
n <- nrow(df)
cbind(df, +matrix(df$x == rep(., each=n), n, dimnames=list(NULL, .)))
Benchmark of some methods which will work for more codes in variable x and not only for e.g. LETTERS
.
set.seed(42)
df <- data.frame(n = seq(1:1000000), x = sample(LETTERS, 1000000, replace = T))
library(nnet)
library(dplyr)
microbenchmark::microbenchmark(times = 10L, setup = gc(FALSE), control=list(order="block")
, "nnet" = df %>% cbind(class.ind(.$x) == 1) %>%
mutate(across(-c(n, x), ~.*1))
, "contrasts" = {. <- contrasts(as.factor(df$x), FALSE)
cbind(df, .[match(df$x, rownames(.)),])}
, "==" = {. <- unique(df$x)
cbind(df, +do.call(cbind, lapply(setNames(., .), `==`, df$x)))}
, "==Sapply" = cbind(df, +sapply(unique(df$x), `==`, df$x))
, "matrix" = {. <- unique(df$x)
i <- match(df$x, .)
nc <- length(.)
nr <- length(i)
cbind(df, matrix(`[<-`(integer(nc * nr), 1:nr + nr * (i - 1), 1), nr, nc,
dimnames=list(NULL, .)))}
, "outer" = {. <- unique(df$x)
cbind(df, +outer(df$x, setNames(., .), `==`))}
, "rep" = {. <- unique(df$x)
n <- nrow(df)
cbind(df, +matrix(df$x == rep(., each=n), n, dimnames=list(NULL, .)))}
)
Result
Unit: milliseconds
expr min lq mean median uq max neval
nnet 208.6898 220.2304 326.2210 305.5752 386.3385 541.0621 10
contrasts 1110.0123 1168.7651 1263.5357 1216.1403 1357.0532 1514.4411 10
== 146.2217 156.8141 208.2733 185.1860 275.3909 278.8497 10
==Sapply 290.0458 291.4543 301.3010 295.0557 298.0274 358.0531 10
matrix 302.9993 304.8305 312.9748 306.8981 310.0781 363.0773 10
outer 524.5230 583.5224 603.3300 586.3054 595.4086 807.0260 10
rep 276.2110 285.3983 389.8187 434.2754 435.8607 442.3403 10