0

I want to do the Feature Importance . So I used the caret package for my data set:

    > dput(head(data,100))
structure(list(Store = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), Sales = c(8314, 
8977, 7610, 8864, 8107, 0, 3878, 5080, 5702, 5414, 5600, 5798, 
0, 4258, 6679, 7909, 8663, 8625, 9462, 0, 4011, 6316, 6613, 6347, 
6180, 6736, 0, 4482, 8149, 7450, 8311, 13261, 10243, 0, 5156, 
6222, 5763, 5448, 6757, 5840, 0, 3891, 8074, 7537, 8524, 9780, 
10818, 0, 4687, 6181, 5656, 6440, 7127, 6567, 0, 4622, 9685, 
0, 9590, 10455, 11594, 0, 4542, 7421, 6475, 5589, 6443, 0, 0, 
4613, 7349, 7682, 8226, 8728, 10040, 0, 4149, 7515, 0, 7003, 
6043, 5797, 0, 6024, 9083, 7705, 9046, 8719, 9488, 0, 6564, 0, 
9971, 9484, 8758, 9447, 0, 4373, 5376, 5570), DayOfWeek = c(5, 
4, 3, 2, 1, 7, 6, 5, 4, 3, 2, 1, 7, 6, 5, 4, 3, 2, 1, 7, 6, 5, 
4, 3, 2, 1, 7, 6, 5, 4, 3, 2, 1, 7, 6, 5, 4, 3, 2, 1, 7, 6, 5, 
4, 3, 2, 1, 7, 6, 5, 4, 3, 2, 1, 7, 6, 5, 4, 3, 2, 1, 7, 6, 5, 
4, 3, 2, 1, 7, 6, 5, 4, 3, 2, 1, 7, 6, 5, 4, 3, 2, 1, 7, 6, 5, 
4, 3, 2, 1, 7, 6, 5, 4, 3, 2, 1, 7, 6, 5, 4), Open = c(1, 1, 
1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 
1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 
1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 
1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 
1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1), Promo = c(1, 1, 1, 
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 
1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0), StateHoliday = c(4, 3, 
2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 
1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 
4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 
3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 
2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1), SchoolHoliday = structure(c(2L, 
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("0", "1"), class = "factor"), DateYear = c(2015, 
2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 
2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 
2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 
2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 
2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 
2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 
2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 
2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 
2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015
), DateMonth = c(7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 
6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 
4), DateDay = c(31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 
19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 
2, 1, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 
16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 31, 30, 
29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 
13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 30, 29, 28, 27, 26, 
25, 24, 23), DateWeek = c(30, 30, 30, 30, 30, 29, 29, 29, 29, 
29, 29, 29, 28, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 27, 27, 
27, 26, 26, 26, 26, 26, 26, 26, 25, 25, 25, 25, 25, 25, 25, 24, 
24, 24, 24, 24, 24, 24, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 
22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 
20, 20, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 18, 
17, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16), StoreType = c(3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), CompetitionOpen = c(103, 
103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 
103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 103, 
103, 103, 103, 103, 102, 102, 102, 102, 102, 102, 102, 102, 102, 
102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 
102, 102, 102, 102, 102, 102, 102, 102, 101, 101, 101, 101, 101, 
101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 
101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 
100, 100, 100, 100, 100, 100, 100, 100), PromoOpen = c(52, 52, 
52, 52, 52, 51.75, 51.75, 51.75, 51.75, 51.75, 51.75, 51.75, 
51.5, 51.5, 51.5, 51.5, 51.5, 51.5, 51.5, 51.25, 51.25, 51.25, 
51.25, 51.25, 51.25, 51.25, 51, 51, 51, 51, 51, 51, 51, 50.75, 
50.75, 50.75, 50.75, 50.75, 50.75, 50.75, 50.5, 50.5, 50.5, 50.5, 
50.5, 50.5, 50.5, 50.25, 50.25, 50.25, 50.25, 50.25, 50.25, 50.25, 
50, 50, 50, 50, 50, 50, 50, 49.75, 49.75, 49.75, 49.75, 49.75, 
49.75, 49.75, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.5, 49.25, 
49.25, 49.25, 49.25, 49.25, 49.25, 49.25, 49, 49, 49, 49, 49, 
49, 49, 48.75, 48.75, 48.75, 48.75, 48.75, 48.75, 48.75, 48.5, 
48.5, 48.5, 48.5), IspromoinSales = c(1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
1, 1, 1, 1, 1, 1, 1, 1)), .Names = c("Store", "Sales", "DayOfWeek", 
"Open", "Promo", "StateHoliday", "SchoolHoliday", "DateYear", 
"DateMonth", "DateDay", "DateWeek", "StoreType", "CompetitionOpen", 
"PromoOpen", "IspromoinSales"), sorted = "Store", class = c("data.table", 
"data.frame"), row.names = c(NA, -100L), .internal.selfref = <pointer: 0x000000000c610788>)

Then I write this code to do training of feature:

> library(caret)
> set.seed(42)
> model_rf <- caret::train(Sales ~ .,
+                          data = data[,-1],
+                          method = "rf",
+                          trControl = trainControl(method = "repeatedcv", 
+                                                   number = 10, 
+                                                   repeats = 5, 
+                                                   verboseIter = FALSE))

But look what I get :

Something is wrong; all the RMSE metric values are missing: RMSE Rsquared MAE
Min. : NA Min. : NA Min. : NA
1st Qu.: NA 1st Qu.: NA 1st Qu.: NA
Median : NA Median : NA Median : NA
Mean :NaN Mean :NaN Mean :NaN
3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA
Max. : NA Max. : NA Max. : NA
NA's :3 NA's :3 NA's :3
Error: Stopping

In addition: There were 50 or more warnings (use warnings() to see the first 50)

How should I remove this and get correct result?

thank you !

user8810618
  • 115
  • 11
  • 1
    read the warnings, search SO for "something is wrong", there are a multitude of SO questions about this. If you still cannot find anything, then we need a [reproducible example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) – phiver Mar 04 '18 at 13:17
  • @phiver, I tried to convert all variables into numeric as said here (https://stackoverflow.com/questions/31685757/caret-train-method-complains-something-is-wrong-all-the-rmse-metric-values-are). But the error still exist! you can see the edited question – user8810618 Mar 04 '18 at 14:01
  • Did you read the `warnings()`? Also don't copy in your data like this. Use `dput(head(data, x))` if you want the first x records. – phiver Mar 04 '18 at 14:16
  • @phiver, look at the error : `> warnings() Messages d'avis : 1: model fit failed for Fold01.Rep1: mtry= 2 Error : cannot allocate vector of size 932.4 Mb 2: model fit failed for Fold01.Rep1: mtry= 7 Error : cannot allocate vector of size 932.4 Mb` – user8810618 Mar 04 '18 at 15:58
  • Well, there is your problem. Too much data for your memory. Try first to see if it works in randomForest before you start with caret. And if you have a lot of data, try ranger or rborist first. They are faster than randomForest. – phiver Mar 04 '18 at 16:21
  • @phiver, thank you very much this gives me what I need even it has **3 minutes** running time ! thank you again! – user8810618 Mar 04 '18 at 16:44

0 Answers0