I have sample data that looks like this:
id <- c("1a","2c","3d","4f","5g","6e","7f","8q","9r","10v","11x","12l")
O <- c(1,1,0,1,1,0,0,1,0,1,0,1)
dg1 <- c("A02","A84","B12","C94","D37","D12","D68","E12","F48","H12","Z83","")
dg2 <- c("B18","N34","A02","M01","B12","J02","K52","","I10","","","B18")
df <- cbind.data.frame(id,O,dg1,dg2)
I am trying to get a data frame that looks like this so that I can do a univariate logistic regression on O against each variable.
A02 <- c(1,0,1,0,0,0,0,0,0,0,0,0)
A84 <- c(0,1,0,0,0,0,0,0,0,0,0,0)
B12 <- c(0,0,1,0,1,0,0,0,0,0,0,0)
B18 <- c(1,0,0,0,0,0,0,0,0,0,0,1)
C94 <- c(0,0,0,1,0,0,0,0,0,0,0,0)
D12 <- c(0,0,0,0,0,1,0,0,0,0,0,0)
D37 <- c(0,0,0,0,1,0,0,0,0,0,0,0)
D68 <- c(0,0,0,0,0,0,1,0,0,0,0,0)
E12 <- c(0,0,0,0,0,0,0,1,0,0,0,0)
F48 <- c(0,0,0,0,0,0,0,0,1,0,0,0)
H12 <- c(0,0,0,0,0,0,0,0,0,1,0,0)
I10 <- c(0,0,0,0,0,0,0,0,1,0,0,0)
J02 <- c(0,0,0,0,0,1,0,0,0,0,0,0)
K52 <- c(0,0,0,0,0,0,1,0,0,0,0,0)
M01 <- c(0,0,0,1,0,0,0,0,0,0,0,0)
N34 <- c(0,1,0,0,0,0,0,0,0,0,0,0)
Z83 <- c(0,0,0,0,0,0,0,0,0,0,1,0)
df <- cbind.data.frame(df,A02,A84,B12,B18,C94,D12,D37,D68,E12,F48,H12,I10,J02,K52,M01,N34,Z83)
I've attempted to follow the code here and here but ran into issues that I wasn't sure how to fix. Can anyone point out my mistake/misunderstanding? I would prefer to have a solution in dplyr or base, but really willing to try anything.
Attempts:
dumbo <- model.matrix(id ~ dg1+dg2,df)
library(recipes)
dumber <- df %>% recipe(id ~ .) %>%
step_dummy(dg1:dg2,
one_hot = TRUE) %>%
prep() %>% bake(new_data=NULL)