I want to bin the numeric variables in a dataframe, please have a look at my example code:
x <- -10:10
y <- x^2
parab <- data.frame(x, y)
str(parab)
## 'data.frame': 21 obs. of 2 variables:
## $ x: int -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 ...
## $ y: num 100 81 64 49 36 25 16 9 4 1 ...
cut(parab$x, 3) #works as expected
## [1] (-10,-3.33] (-10,-3.33] (-10,-3.33] (-10,-3.33] (-10,-3.33]
## [6] (-10,-3.33] (-10,-3.33] (-3.33,3.33] (-3.33,3.33] (-3.33,3.33]
## [11] (-3.33,3.33] (-3.33,3.33] (-3.33,3.33] (-3.33,3.33] (3.33,10]
## [16] (3.33,10] (3.33,10] (3.33,10] (3.33,10] (3.33,10]
## [21] (3.33,10]
## Levels: (-10,-3.33] (-3.33,3.33] (3.33,10]
apply(parab, 2, function(x) cut(x, 3)) #works as expected
## x y
## [1,] "(-10,-3.33]" "(66.7,100]"
## [2,] "(-10,-3.33]" "(66.7,100]"
## [3,] "(-10,-3.33]" "(33.3,66.7]"
## [4,] "(-10,-3.33]" "(33.3,66.7]"
## [5,] "(-10,-3.33]" "(33.3,66.7]"
## [6,] "(-10,-3.33]" "(-0.1,33.3]"
## [7,] "(-10,-3.33]" "(-0.1,33.3]"
## [8,] "(-3.33,3.33]" "(-0.1,33.3]"
## [9,] "(-3.33,3.33]" "(-0.1,33.3]"
## [10,] "(-3.33,3.33]" "(-0.1,33.3]"
## [11,] "(-3.33,3.33]" "(-0.1,33.3]"
## [12,] "(-3.33,3.33]" "(-0.1,33.3]"
## [13,] "(-3.33,3.33]" "(-0.1,33.3]"
## [14,] "(-3.33,3.33]" "(-0.1,33.3]"
## [15,] "(3.33,10]" "(-0.1,33.3]"
## [16,] "(3.33,10]" "(-0.1,33.3]"
## [17,] "(3.33,10]" "(33.3,66.7]"
## [18,] "(3.33,10]" "(33.3,66.7]"
## [19,] "(3.33,10]" "(33.3,66.7]"
## [20,] "(3.33,10]" "(66.7,100]"
## [21,] "(3.33,10]" "(66.7,100]"
apply(parab, 2, function(x) if(is.numeric(x)) cut(x, 3) else x) #works as expected
## x y
## [1,] "(-10,-3.33]" "(66.7,100]"
## [2,] "(-10,-3.33]" "(66.7,100]"
## [3,] "(-10,-3.33]" "(33.3,66.7]"
## [4,] "(-10,-3.33]" "(33.3,66.7]"
## [5,] "(-10,-3.33]" "(33.3,66.7]"
## [6,] "(-10,-3.33]" "(-0.1,33.3]"
## [7,] "(-10,-3.33]" "(-0.1,33.3]"
## [8,] "(-3.33,3.33]" "(-0.1,33.3]"
## [9,] "(-3.33,3.33]" "(-0.1,33.3]"
## [10,] "(-3.33,3.33]" "(-0.1,33.3]"
## [11,] "(-3.33,3.33]" "(-0.1,33.3]"
## [12,] "(-3.33,3.33]" "(-0.1,33.3]"
## [13,] "(-3.33,3.33]" "(-0.1,33.3]"
## [14,] "(-3.33,3.33]" "(-0.1,33.3]"
## [15,] "(3.33,10]" "(-0.1,33.3]"
## [16,] "(3.33,10]" "(-0.1,33.3]"
## [17,] "(3.33,10]" "(33.3,66.7]"
## [18,] "(3.33,10]" "(33.3,66.7]"
## [19,] "(3.33,10]" "(33.3,66.7]"
## [20,] "(3.33,10]" "(66.7,100]"
## [21,] "(3.33,10]" "(66.7,100]"
apply(parab, 2, function(x) ifelse(T, cut(x, 3), T)) #does not work!
## x y
## 1 3
parab$z <- rep("test", length(x))
str(parab)
## 'data.frame': 21 obs. of 3 variables:
## $ x: int -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 ...
## $ y: num 100 81 64 49 36 25 16 9 4 1 ...
## $ z: chr "test" "test" "test" "test" ...
apply(parab, 2, function(x) if(is.numeric(x)) cut(x, 3) else x) #does not work anymore?!?
## x y z
## [1,] "-10" "100" "test"
## [2,] " -9" " 81" "test"
## [3,] " -8" " 64" "test"
## [4,] " -7" " 49" "test"
## [5,] " -6" " 36" "test"
## [6,] " -5" " 25" "test"
## [7,] " -4" " 16" "test"
## [8,] " -3" " 9" "test"
## [9,] " -2" " 4" "test"
## [10,] " -1" " 1" "test"
## [11,] " 0" " 0" "test"
## [12,] " 1" " 1" "test"
## [13,] " 2" " 4" "test"
## [14,] " 3" " 9" "test"
## [15,] " 4" " 16" "test"
## [16,] " 5" " 25" "test"
## [17,] " 6" " 36" "test"
## [18,] " 7" " 49" "test"
## [19,] " 8" " 64" "test"
## [20,] " 9" " 81" "test"
## [21,] " 10" "100" "test"
My questions
- Why do you have to use
if
andelse
instead ofifelse
(I think it has to do withifelse
being vectorized?) ...and more importantly - Why does the
cut
function stop working when another column is not numeric? How can I remedy the situation to get it functional again?