0

I do not want "normalize" my data into [0,1]. I just want to change my whole data set values who are between [1,3] to 1, values who are greater than 3 to 0. Also, it is not "creating a dummy variable". I just want to transform my whole data set value. Just like "Encode".

        v1    v2   v3  v4
obs1    1.3   4    2.2  5
obs2    2.0   3.4  1.7  8
obs3    2.3   3.6  2.1  2.5

My desired output is:
       1 0 1 0
       1 0 1 0
       1 0 1 1     

My real data set is a little bit "noisy":

> dput(head(data1))

structure(list(V1 = structure(1:6, .Label = c("egl19_f1", "egl19_f2", 
"egl19_f3", "egl19_nf1", "egl19_nf2", "egl19h20_nf1", "egl19h20_nf2", 
"N2_f1", "N2_f2", "N2_f3", "N2_f4", "N2_nf1", "N2_nf10", "N2_nf11", 
"N2_nf12", "N2_nf13", "N2_nf14", "N2_nf15", "N2_nf16", "N2_nf17", 
"N2_nf18", "N2_nf19", "N2_nf2", "N2_nf20", "N2_nf21", "N2_nf22", 
"N2_nf23", "N2_nf24", "N2_nf25", "N2_nf26", "N2_nf27", "N2_nf28", 
"N2_nf29", "N2_nf3", "N2_nf30", "N2_nf31", "N2_nf32", "N2_nf33", 
"N2_nf4", "N2_nf5", "N2_nf6", "N2_nf7", "N2_nf8", "N2_nf9", "N2_nnf1", 
"N2_nnf2", "tph1_f1", "tph1_f10", "tph1_f11", "tph1_f12", "tph1_f13", 
"tph1_f2", "tph1_f3", "tph1_f4", "tph1_f5", "tph1_f6", "tph1_f7", 
"tph1_f8", "tph1_f9", "tph1_nf1", "tph1_nf2", "tph1_nf3", "tph1_nf4", 
"tph1_nf5"), class = "factor"), V2 = c(1.597846, 1.766222, 2.616263, 
2.11194, 2.25267, 4.984707), V3 = c(2.116104, 1.498594, 1.878867, 
2.58981, 2.328275, 4.861571), V4 = c(2.014185, 1.312045, 5.738319, 
2.404355, 2.104623, 1.731892), V5 = c(2.162238, 3.857461, 2.011785, 
3.093034, 1.822684, 1.652817), V6 = c(1.612883, 2.290582, 3.492973, 
3.751587, 3.131442, NA), V7 = c(5.094708, 7.526972, 2.016519, 
4.005168, 6.266833, NA), V8 = c(1.785222, 2.124262, 2.026904, 
4.459859, 1.350723, NA), V9 = c(NA, 1.405944, 3.482505, 5.093975, 
NA, NA), V10 = c(NA, 1.540232, 4.095237, 4.179566, NA, NA), V11 = c(NA, 
1.499319, 2.371864, 5.480289, NA, NA), V12 = c(NA, 1.478772, 
1.53436, 4.809065, NA, NA), V13 = c(NA, 2.569976, 1.61257, 3.841687, 
NA, NA), V14 = c(NA, 3.325919, 2.113012, 2.66648, NA, NA), V15 = c(NA, 
7.657997, NA, 11.97038, NA, NA), V16 = c(NA, 2.213487, NA, NA, 
NA, NA), V17 = c(NA, 2.224062, NA, NA, NA, NA), V18 = c(NA, 2.466867, 
NA, NA, NA, NA), V19 = c(NA, 4.105319, NA, NA, NA, NA), V20 = c(NA, 
1.447206, NA, NA, NA, NA), V21 = c(NA, 1.531235, NA, NA, NA, 
NA), V22 = c(NA, 1.482622, NA, NA, NA, NA), V23 = c(NA, 1.609606, 
NA, NA, NA, NA), V24 = c(NA, 1.490796, NA, NA, NA, NA), V25 = c(NA, 
1.416841, NA, NA, NA, NA), V26 = c(NA, 3.016755, NA, NA, NA, 
NA), V27 = c(NA, 1.533071, NA, NA, NA, NA), V28 = c(NA, 1.615619, 
NA, NA, NA, NA), V29 = c(NA, 1.579121, NA, NA, NA, NA), V30 = c(NA, 
1.552443, NA, NA, NA, NA), V31 = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), V32 = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), V33 = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), V34 = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), V35 = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), V36 = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), V37 = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), V38 = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_)), .Names = c("V1", "V2", "V3", 
"V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12", "V13", 
"V14", "V15", "V16", "V17", "V18", "V19", "V20", "V21", "V22", 
"V23", "V24", "V25", "V26", "V27", "V28", "V29", "V30", "V31", 
"V32", "V33", "V34", "V35", "V36", "V37", "V38"), row.names = c(NA, 
6L), class = "data.frame")

Here is my code:

  na.omit(data1)
for(i in 1:nrow(data1)){
  for(j in 2:ncol(data1)){
    if((!is.na(data[i,j]) && data[i,j]!= '')){
     if(1<data1[i,j]&&data1[i,j]<3){
      data1[i,j]=1
          }
     else{
      data1[i,j]=0
    }
    }
  }
}

This is my code, but still wrong. Error in data[i, j] : object of type 'closure' is not subsettable

Ian
  • 31
  • 1
  • 7

1 Answers1

1

The easiest solution will be probably using the data.frame method of the < function in order to override your vales with binary ones. Either

df[-1] <- +(df[-1] <= 3)

Or less golfed

df[-1] <- (df[-1] <= 3) + 0L
David Arenburg
  • 91,361
  • 17
  • 137
  • 196
  • would you mind have a look at my another question? Really stuck there:( Thank you – Ian Nov 28 '15 at 21:31