0
respect$BB6_cat5_0 <- ifelse(respect$BB6_cat5 == 1, 1, 0)
respect$BB6_cat5_1 <- ifelse(respect$BB6_cat5 == 2, 1, 0)
respect$BB6_cat5_2 <- ifelse(respect$BB6_cat5 == 3, 1, 0)
respect$BB6_cat5_3 <- ifelse(respect$BB6_cat5 == 4, 1, 0)
respect$BB6_cat5_4 <- ifelse(respect$BB6_cat5 == 5, 1, 0)

respect$J1A_cat2_0 <- ifelse(respect$J1A_cat2 == 1, 1, 0)
respect$J1A_cat2_1 <- ifelse(respect$J1A_cat2 == 2, 1, 0)

The category lengths vary for the variables and the naming of the dummy variables is very important for my project.

I have tried the following but this leaves out the referent variable due to how model.matrix works.

dummy.fun<-function(data) {
data_factor<-data.frame(lapply(data,as.factor))
    names(data_factor)<-paste0(names(data_factor),'_')
        data_all<-data.frame(model.matrix(~.+0,data=data_factor))
 }

dummy.fun(respect)

However, this does not give me the BB6_cat5_0 unless it is the first variable in the dataset. Any ideas as top how I can get this function to output all dummy variables even the referents?

Rob Tala
  • 13
  • 3
  • 1
    Please provide a minimal working example, i.e. data, code and expected output. – emilliman5 Jan 11 '18 at 15:39
  • I don't think that code would _ever_ give you BB6_cat5_0 since there is no argument after the "_", and R indexing starts with 1, not 0. – IRTFM Jan 11 '18 at 15:56
  • Looks like you want exactly what `model.matrix` gives but with different names. Just modify the names instead of writing all those `ifelse` statements. If you post sample input, we can help/ – Gregor Thomas Jan 11 '18 at 16:19

3 Answers3

0

I'm sure there's a nicer way of doing this (a double for loop is not a pleasant sight) but the following should do what you want:

respect <- list(BB6_cat5 = 1,BB6_cat0 = 2, BB6_cat1 = 3)

respect_names <- names(respect)

for(pos in 1:length(respect_names)){
    for(i in 0:4) respect[[paste0(respect_names[pos],"_",i)]] = ifelse(respect[[(respect_names[pos])]] == (i+1),1,0)
}

respect

The first level loops through the original named items in respect. The second level loops through the values you wish to test. It uses the [[]] syntax to add values to the list and to keep the test consistent.

AodhanOL
  • 630
  • 7
  • 26
0

Try with this function:

# Sample data:
set.seed(123)
df <- data.frame(a = sample(letters[1:3], 10, replace = T),
                 b = sample(1:2, 10, replace = T))

# Function:
vars_to_dummy <- function(df, vars) {
  stopifnot(all(vars %in% names(df)))
  for (i in vars) {
    dummy_names <- sort(unique(df[[i]]))
    dummy_i <- as.data.frame(
      sapply(dummy_names, function(x) {
        (df[[i]] == x)*1
      }, USE.NAMES = TRUE)
    )
    names(dummy_i) <- paste(names(df[i]), dummy_names, sep = "_")
    df <- cbind(df, dummy_i)
  }
  return(df)
}
# Try that:
vars_to_dummy(df, vars = c("a", "b"))

> vars_to_dummy(df, vars = c("a", "b"))
   a b a_a a_b a_c b_1 b_2
1  a 2   1   0   0   0   1
2  c 1   0   0   1   1   0
3  b 2   0   1   0   0   1
4  c 2   0   0   1   0   1
5  c 1   0   0   1   1   0
6  a 2   1   0   0   0   1
7  b 1   0   1   0   1   0
8  c 1   0   0   1   1   0
9  b 1   0   1   0   1   0
10 b 2   0   1   0   0   1

Edit:

df is your data frame, vars is a character vector denoting column names you want dummies for.

Tino
  • 2,091
  • 13
  • 15
0
dummy.fun<-function(data) {
    data_factor<-data.frame(lapply(data,as.factor))
      names(data_factor)<-paste0(names(data_factor),'_')
         data_all<-data.frame(model.matrix(~.+0,data=data_factor,
contrasts.arg=lapply(data_factor[sapply,as.factor],contrasts,contrasts=FALSE)))
 }
Rob Tala
  • 13
  • 3