0

I am trying to use ols_step_both_p with group_by function of dplyr package. I am using the following code

library(tidyverse)
library(olsrr)

#Do linear regression for all the locations
model = df %>% 
  mutate(Location = factor(Location, levels = unique(Location))) %>% 
  group_by(Location) %>%
  do(mod = lm(y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12, data = ., 
              na.action=na.omit)) 

#Do stepwise regression for 1st location
ols_step_both_p(model$mod[[1]], pent = 0.05, prem = 0.1)

It returns me following error

Error in eval(model$call$data) : object '.' not found

How can I solve this error?

Data

df = structure(list(Location = c("Location 1", "Location 1", "Location 1", 
"Location 1", "Location 1", "Location 1", "Location 1", "Location 1", 
"Location 1", "Location 1", "Location 1", "Location 1", "Location 1", 
"Location 1", "Location 1", "Location 1", "Location 1", "Location 1", 
"Location 1", "Location 1", "Location 2", "Location 2", "Location 2", 
"Location 2", "Location 2", "Location 2", "Location 2", "Location 2", 
"Location 2", "Location 2", "Location 2", "Location 2", "Location 2", 
"Location 2", "Location 2", "Location 2", "Location 2", "Location 2", 
"Location 2", "Location 2"), y = c(1.65954284204268, 1.58123919015159, 
2.05973017000125, 2.18912673315495, 2.01224505269543, 1.99259958467057, 
2.00078847394452, 1.36897959183673, 1.52340971947847, 0.0261531145981931, 
0.154664774742817, 0.790430042398546, 1.28059309309309, 0.974354066985646, 
1.20366598778004, 1.39269070394898, 1.40758547008547, 1.61010461852513, 
1.62170385395538, 1.62471511775133, 2.38461538461538, 2.42220884742702, 
2.41693907875186, 2.81167789541226, 2.74944214217405, 2.21060782036392, 
2.55440414507772, 2.96888447533929, 2.63223300970874, 2.91519143680527, 
2.7768625982778, 3.56561085972851, 3.11382113821138, 2.9851919008764, 
3.31187669990934, 2.82333333333333, 3.63553943789665, 2.26956784527047, 
2.92354185554548, NA), x1 = c(0, 0, 271.72, 138.49, 0, 9.78, 
59.25, 0, 132.37, 29.14, 127.09, 26.35, 36.34, 22.58, 85.59, 
0, 0, 358.06, 0, 0, 3.33, 0, 0, 0, 5.62, 3.33, 3.23, 16.58, 85.6, 
72.73, 48.72, 29.21, 16.67, 63.12, 53.33, 40, 89.46, 76.35, 16.66, 
47.0397916666667), x2 = c(13.12, 0, 145.91, 131.29, 6.56, 141.93, 
174.52, 0, 104.95, 133.98, 182.68, 121.07, 128.49, 87.64, 99.25, 
0, 0, 124.63, 0, 3.33, 19.58, 32.58, 51.58, 27.4, 12.68, 29.91, 
72.37, 9.34, 22.82, 55.76, 25.9, 11.93, 25.9, 13.33, 29.46, 29.56, 
19.68, 46.24, 19.9, 39.13625), x3 = c(223.84, 59.36, 0, 3.33, 
81.72, 6.45, 13.01, 196.98, 0, 0, 0, 0, 3.33, 19.89, 3.23, 79.25, 
118.17, 3.23, 118.28, 121.5, 87.01, 70.46, 54.29, 70.97, 51.81, 
84.69, 66.75, 104.27, 29.21, 52.54, 34.51, 103.44, 139.87, 85.16, 
101.93, 82.36, 79.25, 49.14, 91.62, 43.7104166666667), x4 = c(44.04, 
31.43, 3.33, 0, 16.35, 0, 9.68, 108.28, 22.8, 9.9, 6.67, 6.67, 
0, 10, 0, 19.57, 26.01, 9.9, 32.58, 26.13, 58.46, 12.67, 29.16, 
73.39, 65.45, 68.92, 22.93, 107.04, 97.63, 94.2, 132.53, 112.47, 
95.03, 127.42, 107.53, 107.96, 71.72, 68.38, 123.87, 47.4670833333333
), x5 = c(34.12, 35.23, 39.8, 31, 33.8, 31.6, 33.96, 34.7, 33.2, 
32, 32.3, 32.7, 30.72, 33.44, 31.2, 33.03, 33.73, 32.5, 34.57, 
32.83, 33.5, 32.6, 33.3, 33.3, 32.9, 34.9, 32.4, 34.4, 34, 34.3, 
33.6, 35.7, 33.4, 34.5, 34.2, 35.2, 33.6, 34.6, 34.5, 32.7333333333333
), x6 = c(25.99, 25.55, 22.2, 25.6, 25.36, 24.8, 25.7, 26.4, 
22.3, 23.3, 22.8, 23.1, 25.16, 25.59, 24.82, 25.24, 25.93, 25.11, 
25.54, 25.54, 24.5, 24.4, 25.7, 25.7, 26.9, 24.1, 24.4, 24.4, 
25.4, 24, 23.5, 24.3, 24.1, 23.9, 25.1, 27.2, 25, 24.4, 24.5, 
23.628125), x7 = c(26.26, 26.7, 20.7, 21.1, 25.71, 21.6, 20, 
26.48, 22.9, 21.3, 21.2, 21.5, 22.3, 21.6, 21, 27.12, 27.23, 
22.6, 27.09, 26.29, 24.2, 25.2, 21.9, 22.6, 25.3, 23.4, 22.6, 
25.6, 25, 21.3, 19, 24.7, 21.1, 23.6, 22.7, 20, 20.4, 22.8, 24.2, 
24.10625), x8 = c(21.34, 21.95, 12, 16.9, 22.47, 19, 18, 22.86, 
15.2, 18.2, 17.2, 14.7, 17.7, 19.2, 16.3, 21.88, 21.55, 14.5, 
22.09, 22.1, 16.9, 18.6, 16.9, 19.2, 17.9, 19, 18.6, 17.4, 13.6, 
10, 13.3, 13.3, 14.2, 12, 11.9, 11.5, 10, 12.8, 13.7, 16.5791666666667
), x9 = c(5.46, 5.99, 8.2875, 7.2175, 6.18, 5.5925, 6.3025, 6.565, 
8.2725, 6.4175, 7.0825, 6.915, 6.89, 6.4475, 7.15, 5.945, 6.165, 
10.925, 6.0325, 5.52, 7.7925, 7.5525, 7.4025, 7.985, 8.505, 8.06, 
7.3125, 8.775, 9.615, 9.2025, 8.9075, 8.8675, 8.315, 9.1525, 
8.965, 8.9225, 9.475, 8.87, 8.6175, 8.30661458333333), x10 = c(0, 
0.25, 8.25, 6.25, 0, 10.75, 6, 0, 5.75, 4, 6.75, 4.25, 4.5, 2.5, 
5.5, 0, 0.25, 5.5, 0, 0, 7.75, 8.5, 6.75, 8.75, 10, 5.75, 10.25, 
9.75, 15, 10.25, 9.5, 10.75, 11.5, 12.25, 13.5, 10, 14.25, 13.25, 
11.25, 6.5), x11 = c(66.96, 77.19, 454, 60.5, 40.8, 61.6, 96.5, 
48.37, 100.7, 105.8, 48.5, 70.5, 118.2, 79, 56.6, 60.4, 80.1, 
119.07, 72.92, 213.39, 65.9, 54.2, 50.6, 98.1, 106.3, 77.8, 39.7, 
79.8, 33.2, 71, 49.3, 74, 97, 55.4, 84.8, 116, 51.7, 87.6, 35.2, 
51.4166666666667), x12 = c(227.07, 183.81, 570.7, 160.3, 145.07, 
136.2, 153.9, 149.17, 333.6, 224.4, 106.5, 138.05, 185.8, 176.64, 
227.9, 151.13, 261.67, 273.67, 196.28, 513.67, 156.6, 136.8, 
79.5, 200.6, 286.4, 157.6, 86.3, 164.6, 74.2, 120.7, 111.5, 104.5, 
189.1, 150, 139.2, 206, 82.2, 140.5, 137.6, 101.529166666667)), row.names = c(NA, 
40L), class = "data.frame")
UseR10085
  • 7,120
  • 3
  • 24
  • 54

1 Answers1

0

You need to change the data argument from . to df inside model. The code will look like this:

library(tidyverse)
library(olsrr)

#Do linear regression for all the locations
model = df %>% 
  mutate(Location = factor(Location, levels = unique(Location))) %>% 
  group_by(Location) %>%
  do(mod = lm(y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12, data = df, 
              na.action=na.omit)) 

#Do stepwise regression for 1st location
ols_step_both_p(model$mod[[1]], pent = 0.05, prem = 0.1)

EDIT You can use the plyr package in R to achieve what you're looking for.

model <- dlply(df, "Location", function(df) 
  lm(y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12, data = df, 
                                               na.action=na.omit))

It will give output like this:

$`Location 1`

Call:
lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + 
    x10 + x11 + x12, data = df, na.action = na.omit)

Coefficients:
(Intercept)           x1           x2           x3           x4  
 -1.456e+01   -8.337e-05   -5.070e-03   -3.452e-04   -5.945e-03  
         x5           x6           x7           x8           x9  
  1.573e-01    4.143e-01    8.837e-02   -8.740e-02   -3.271e-02  
        x10          x11          x12  
  1.884e-01   -2.184e-03    1.565e-03  


$`Location 2`

Call:
lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + 
    x10 + x11 + x12, data = df, na.action = na.omit)

Coefficients:
(Intercept)           x1           x2           x3           x4  
   4.514414    -0.026296    -0.003970     0.002311    -0.003639  
         x5           x6           x7           x8           x9  
  -0.201435    -0.045264    -0.023305    -0.096439     1.047886  
        x10          x11          x12  
   0.067007     0.009080    -0.006701  


attr(,"split_type")
[1] "data.frame"
attr(,"split_labels")
    Location
1 Location 1
2 Location 2

And then you can use ols_step_both_p like this:

ols_step_both_p(model$`Location 1`, pent = 0.05, prem = 0.1)
ols_step_both_p(model$`Location 2`, pent = 0.05, prem = 0.1)
Vishal A.
  • 1,373
  • 8
  • 19
  • I have done this. But if you use `model$mod[[1]]` and `model$mod[[2]]`, then you will find that the outputs are same. For more details you can visit [this](https://github.com/rsquaredacademy/olsrr/issues/192). – UseR10085 Jan 03 '22 at 06:24
  • That is happening because, if you see `model$mod[[1]]` or `model$mod[[2]]` output, data is listed as `.` inside `Call`. – Vishal A. Jan 03 '22 at 06:54
  • One possible way would be to filter the data and create separate `df` for `Location1` and `Location2`. – Vishal A. Jan 03 '22 at 06:55
  • Separately that can be done. But I have many locations. So, I wanted to automate it. – UseR10085 Jan 03 '22 at 07:23
  • I have edited my answer. Please check if it solves your problem. – Vishal A. Jan 03 '22 at 08:23
  • Have you chacked the output of ```ols_step_both_p(model$`Location 1`, pent = 0.05, prem = 0.1)``` and ```ols_step_both_p(model$`Location 2`, pent = 0.05, prem = 0.1)```? I am getting same output. – UseR10085 Jan 03 '22 at 09:14
  • I'm pretty sure that `C(p)` values are different – Vishal A. Jan 03 '22 at 09:52
  • 1
    I have reported the problem to the developer of `olsrr` package. He has solved it. Using the development version from GitHub solves the problem. – UseR10085 Jan 06 '22 at 05:18
  • Alright! Way to go! – Vishal A. Jan 06 '22 at 07:06