It's not perfectly clear how you want your output, but here's a suggested path, following tidyr
nested objects.
Make some fake data, following your methodology above:
dat <- mtcars[,1:5]
Cols <- names(dat)
Cols <- Cols[! Cols %in% 'mpg']
n <- length(Cols)
id <- unlist(
lapply(1:n,
function(i)combn(1:n, i, simplify = FALSE)
),
recursive = FALSE)
str(id)
# List of 15
# $ : int 1
# $ : int 2
# $ : int 3
# $ : int 4
# $ : int [1:2] 1 2
# $ : int [1:2] 1 3
# $ : int [1:2] 1 4
# $ : int [1:2] 2 3
# $ : int [1:2] 2 4
# $ : int [1:2] 3 4
# $ : int [1:3] 1 2 3
# $ : int [1:3] 1 2 4
# $ : int [1:3] 1 3 4
# $ : int [1:3] 2 3 4
# $ : int [1:4] 1 2 3 4
Formulas <- sapply(id, function(i)
paste('mpg ~', paste(Cols[i], collapse=" + ")))
head(Formulas)
# [1] "mpg ~ cyl" "mpg ~ disp" "mpg ~ hp" "mpg ~ drat"
# [5] "mpg ~ cyl + disp" "mpg ~ cyl + hp"
This is where I diverge from your path.
library(dplyr)
library(tidyr)
library(purrr)
x <- data_frame(Formulas) %>%
mutate(
lms = map(Formulas, ~ lm(as.formula(.), data = dat)),
summaries = map(lms, ~ summary(.)),
coefs = map(summaries, ~ as.data.frame(coef(.)))
)
x
# # A tibble: 15 × 4
# Formulas lms summaries coefs
# <chr> <list> <list> <list>
# 1 mpg ~ cyl <S3: lm> <S3: summary.lm> <data.frame [2 × 4]>
# 2 mpg ~ disp <S3: lm> <S3: summary.lm> <data.frame [2 × 4]>
# 3 mpg ~ hp <S3: lm> <S3: summary.lm> <data.frame [2 × 4]>
# 4 mpg ~ drat <S3: lm> <S3: summary.lm> <data.frame [2 × 4]>
# 5 mpg ~ cyl + disp <S3: lm> <S3: summary.lm> <data.frame [3 × 4]>
# 6 mpg ~ cyl + hp <S3: lm> <S3: summary.lm> <data.frame [3 × 4]>
# 7 mpg ~ cyl + drat <S3: lm> <S3: summary.lm> <data.frame [3 × 4]>
# 8 mpg ~ disp + hp <S3: lm> <S3: summary.lm> <data.frame [3 × 4]>
# 9 mpg ~ disp + drat <S3: lm> <S3: summary.lm> <data.frame [3 × 4]>
# 10 mpg ~ hp + drat <S3: lm> <S3: summary.lm> <data.frame [3 × 4]>
# 11 mpg ~ cyl + disp + hp <S3: lm> <S3: summary.lm> <data.frame [4 × 4]>
# 12 mpg ~ cyl + disp + drat <S3: lm> <S3: summary.lm> <data.frame [4 × 4]>
# 13 mpg ~ cyl + hp + drat <S3: lm> <S3: summary.lm> <data.frame [4 × 4]>
# 14 mpg ~ disp + hp + drat <S3: lm> <S3: summary.lm> <data.frame [4 × 4]>
# 15 mpg ~ cyl + disp + hp + drat <S3: lm> <S3: summary.lm> <data.frame [5 × 4]>
I did this piece-wise, keeping the models and the summaries, primarily for demonstration and in case you re-use lm
(perhaps for predict
). If you know you never need to keep the raw lm
output, you could combine them into a single function call.
I believe you are asking for a data.frame of the coefficients, in which case:
x$summaries[[1]]
# Call:
# lm(formula = as.formula(.), data = dat)
# Residuals:
# Min 1Q Median 3Q Max
# -4.9814 -2.1185 0.2217 1.0717 7.5186
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 37.8846 2.0738 18.27 < 2e-16 ***
# cyl -2.8758 0.3224 -8.92 6.11e-10 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Residual standard error: 3.206 on 30 degrees of freedom
# Multiple R-squared: 0.7262, Adjusted R-squared: 0.7171
# F-statistic: 79.56 on 1 and 30 DF, p-value: 6.113e-10
coef(x$summaries[[1]])
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 37.88458 2.0738436 18.267808 8.369155e-18
# cyl -2.87579 0.3224089 -8.919699 6.112687e-10
Unfortunately, if you try to combine all of these coefficient summaries into a single data.frame, the row names are lost in dplyr::bind_rows
:
bind_rows(map(x$summaries[1:2], ~ as.data.frame(coef(.))))
# Estimate Std. Error t value Pr(>|t|)
# 1 37.88457649 2.073843606 18.267808 8.369155e-18
# 2 -2.87579014 0.322408883 -8.919699 6.112687e-10
# 3 29.59985476 1.229719515 24.070411 3.576586e-21
# 4 -0.04121512 0.004711833 -8.747152 9.380327e-10
One could always use base R, though you are lacking the "which model" component:
do.call(rbind.data.frame, map(x$summaries[1:2], ~ as.data.frame(coef(.))))
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 37.88457649 2.073843606 18.267808 8.369155e-18
# cyl -2.87579014 0.322408883 -8.919699 6.112687e-10
# (Intercept)1 29.59985476 1.229719515 24.070411 3.576586e-21
# disp -0.04121512 0.004711833 -8.747152 9.380327e-1
We can re-introduce that by using tibble::rownames_to_column
in the original pipeline:
x <- data_frame(Formulas) %>%
mutate(
lms = map(Formulas, ~ lm(as.formula(.), data = dat)),
summaries = map(lms, ~ summary(.)),
coefs = map(summaries, ~ tibble::rownames_to_column(as.data.frame(coef(.))))
)
select(x, Formulas, coefs) %>% unnest()
# # A tibble: 47 × 6
# Formulas rowname Estimate `Std. Error` `t value` `Pr(>|t|)`
# <chr> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 mpg ~ cyl (Intercept) 37.88457649 2.073843606 18.267808 8.369155e-18
# 2 mpg ~ cyl cyl -2.87579014 0.322408883 -8.919699 6.112687e-10
# 3 mpg ~ disp (Intercept) 29.59985476 1.229719515 24.070411 3.576586e-21
# 4 mpg ~ disp disp -0.04121512 0.004711833 -8.747152 9.380327e-10
# 5 mpg ~ hp (Intercept) 30.09886054 1.633920950 18.421246 6.642736e-18
# 6 mpg ~ hp hp -0.06822828 0.010119304 -6.742389 1.787835e-07
# 7 mpg ~ drat (Intercept) -7.52461844 5.476662574 -1.373942 1.796391e-01
# 8 mpg ~ drat drat 7.67823260 1.506705108 5.096042 1.776240e-05
# 9 mpg ~ cyl + disp (Intercept) 34.66099474 2.547003876 13.608536 4.022869e-14
# 10 mpg ~ cyl + disp cyl -1.58727681 0.711844271 -2.229809 3.366495e-02
# # ... with 37 more rows