I do not want "normalize" my data into [0,1]
. I just want to change my whole data set values who are between [1,3]
to 1
, values who are greater than 3
to 0
. Also, it is not "creating a dummy variable". I just want to transform my whole data set value. Just like "Encode".
v1 v2 v3 v4
obs1 1.3 4 2.2 5
obs2 2.0 3.4 1.7 8
obs3 2.3 3.6 2.1 2.5
My desired output is:
1 0 1 0
1 0 1 0
1 0 1 1
My real data set is a little bit "noisy":
> dput(head(data1))
structure(list(V1 = structure(1:6, .Label = c("egl19_f1", "egl19_f2",
"egl19_f3", "egl19_nf1", "egl19_nf2", "egl19h20_nf1", "egl19h20_nf2",
"N2_f1", "N2_f2", "N2_f3", "N2_f4", "N2_nf1", "N2_nf10", "N2_nf11",
"N2_nf12", "N2_nf13", "N2_nf14", "N2_nf15", "N2_nf16", "N2_nf17",
"N2_nf18", "N2_nf19", "N2_nf2", "N2_nf20", "N2_nf21", "N2_nf22",
"N2_nf23", "N2_nf24", "N2_nf25", "N2_nf26", "N2_nf27", "N2_nf28",
"N2_nf29", "N2_nf3", "N2_nf30", "N2_nf31", "N2_nf32", "N2_nf33",
"N2_nf4", "N2_nf5", "N2_nf6", "N2_nf7", "N2_nf8", "N2_nf9", "N2_nnf1",
"N2_nnf2", "tph1_f1", "tph1_f10", "tph1_f11", "tph1_f12", "tph1_f13",
"tph1_f2", "tph1_f3", "tph1_f4", "tph1_f5", "tph1_f6", "tph1_f7",
"tph1_f8", "tph1_f9", "tph1_nf1", "tph1_nf2", "tph1_nf3", "tph1_nf4",
"tph1_nf5"), class = "factor"), V2 = c(1.597846, 1.766222, 2.616263,
2.11194, 2.25267, 4.984707), V3 = c(2.116104, 1.498594, 1.878867,
2.58981, 2.328275, 4.861571), V4 = c(2.014185, 1.312045, 5.738319,
2.404355, 2.104623, 1.731892), V5 = c(2.162238, 3.857461, 2.011785,
3.093034, 1.822684, 1.652817), V6 = c(1.612883, 2.290582, 3.492973,
3.751587, 3.131442, NA), V7 = c(5.094708, 7.526972, 2.016519,
4.005168, 6.266833, NA), V8 = c(1.785222, 2.124262, 2.026904,
4.459859, 1.350723, NA), V9 = c(NA, 1.405944, 3.482505, 5.093975,
NA, NA), V10 = c(NA, 1.540232, 4.095237, 4.179566, NA, NA), V11 = c(NA,
1.499319, 2.371864, 5.480289, NA, NA), V12 = c(NA, 1.478772,
1.53436, 4.809065, NA, NA), V13 = c(NA, 2.569976, 1.61257, 3.841687,
NA, NA), V14 = c(NA, 3.325919, 2.113012, 2.66648, NA, NA), V15 = c(NA,
7.657997, NA, 11.97038, NA, NA), V16 = c(NA, 2.213487, NA, NA,
NA, NA), V17 = c(NA, 2.224062, NA, NA, NA, NA), V18 = c(NA, 2.466867,
NA, NA, NA, NA), V19 = c(NA, 4.105319, NA, NA, NA, NA), V20 = c(NA,
1.447206, NA, NA, NA, NA), V21 = c(NA, 1.531235, NA, NA, NA,
NA), V22 = c(NA, 1.482622, NA, NA, NA, NA), V23 = c(NA, 1.609606,
NA, NA, NA, NA), V24 = c(NA, 1.490796, NA, NA, NA, NA), V25 = c(NA,
1.416841, NA, NA, NA, NA), V26 = c(NA, 3.016755, NA, NA, NA,
NA), V27 = c(NA, 1.533071, NA, NA, NA, NA), V28 = c(NA, 1.615619,
NA, NA, NA, NA), V29 = c(NA, 1.579121, NA, NA, NA, NA), V30 = c(NA,
1.552443, NA, NA, NA, NA), V31 = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), V32 = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), V33 = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), V34 = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), V35 = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), V36 = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), V37 = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), V38 = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_)), .Names = c("V1", "V2", "V3",
"V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12", "V13",
"V14", "V15", "V16", "V17", "V18", "V19", "V20", "V21", "V22",
"V23", "V24", "V25", "V26", "V27", "V28", "V29", "V30", "V31",
"V32", "V33", "V34", "V35", "V36", "V37", "V38"), row.names = c(NA,
6L), class = "data.frame")
Here is my code:
na.omit(data1)
for(i in 1:nrow(data1)){
for(j in 2:ncol(data1)){
if((!is.na(data[i,j]) && data[i,j]!= '')){
if(1<data1[i,j]&&data1[i,j]<3){
data1[i,j]=1
}
else{
data1[i,j]=0
}
}
}
}
This is my code, but still wrong. Error in data[i, j] : object of type 'closure' is not subsettable