Without data and a reproducible example, it is hard to help you, but here's an example with simulated data. First, create a fake dataset, called data
:
library(tidyverse)
make_df <- function(y_i) {
data_frame(y_var = y_i, y_i = rnorm(100),
x1 = rnorm(100), x2 = rnorm(100), x3 = rnorm(100),
cv1 = runif(100), cv2 = runif(100), cv3 = runif(100))
}
ys <- paste0("Y_", sprintf("%02d", 1:10))
ys
#> [1] "Y_01" "Y_02" "Y_03" "Y_04" "Y_05" "Y_06" "Y_07" "Y_08" "Y_09" "Y_10"
data <-
ys %>%
map_dfr(make_df)
data
#> # A tibble: 1,000 x 8
#> y_var y_i x1 x2 x3 cv1 cv2 cv3
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Y_01 0.504 0.892 -0.806 -1.56 0.145 0.436 0.701
#> 2 Y_01 0.967 1.24 -1.19 0.920 0.866 0.00100 0.567
#> 3 Y_01 -0.824 -0.729 -0.0855 -1.06 0.0665 0.780 0.471
#> 4 Y_01 0.294 2.37 -0.514 -0.955 0.397 0.0462 0.209
#> 5 Y_01 -0.893 0.0298 0.0369 0.0787 0.640 0.709 0.0485
#> 6 Y_01 0.670 -0.347 1.56 2.11 0.843 0.542 0.793
#> 7 Y_01 -1.59 1.04 0.228 0.573 0.185 0.151 0.558
#> 8 Y_01 -2.04 0.289 -0.435 -0.113 0.833 0.0898 0.653
#> 9 Y_01 -0.637 0.818 -0.454 0.606 0.294 0.378 0.315
#> 10 Y_01 -1.61 -0.628 -2.75 1.06 0.353 0.0863 0.332
#> # ... with 990 more rows
At this point, you have options. One way is to use the group_by
%>% do(tidy(*))
recipe:
data %>%
gather(x_var, x_value, -c(y_var, y_i, cv1:cv3)) %>%
group_by(y_var, x_var) %>%
do(broom::tidy(lm(y_i ~ x_value + cv1 + cv2 + cv3, data = .)))
#> # A tibble: 150 x 7
#> # Groups: y_var, x_var [30]
#> y_var x_var term estimate std.error statistic p.value
#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 Y_01 x1 (Intercept) -0.111 0.344 -0.324 0.747
#> 2 Y_01 x1 x_value -0.0440 0.111 -0.396 0.693
#> 3 Y_01 x1 cv1 0.286 0.372 0.769 0.444
#> 4 Y_01 x1 cv2 0.0605 0.379 0.160 0.873
#> 5 Y_01 x1 cv3 -0.0690 0.378 -0.182 0.856
#> 6 Y_01 x2 (Intercept) -0.146 0.336 -0.434 0.665
#> 7 Y_01 x2 x_value 0.117 0.105 1.12 0.265
#> 8 Y_01 x2 cv1 0.287 0.362 0.793 0.430
#> 9 Y_01 x2 cv2 0.0564 0.376 0.150 0.881
#> 10 Y_01 x2 cv3 0.0125 0.379 0.0330 0.974
#> # ... with 140 more rows
Another approach is to use a split
variable and then a map
function from purrr
:
data %>%
gather(x_var, x_value, -c(y_var, y_i, cv1:cv3)) %>%
mutate(y_var_x_var = paste0(y_var, x_var)) %>%
split(.$y_var_x_var) %>%
map(~ lm(y_i ~ x_value + cv1 + cv2 + cv3, data = .))
#> $Y_01x1
#>
#> Call:
#> lm(formula = y_i ~ x_value + cv1 + cv2 + cv3, data = .)
#>
#> Coefficients:
#> (Intercept) x_value cv1 cv2 cv3
#> -0.11144 -0.04396 0.28585 0.06051 -0.06896
#>
#>
#> $Y_01x2
#>
#> Call:
#> lm(formula = y_i ~ x_value + cv1 + cv2 + cv3, data = .)
#>
#> Coefficients:
#> (Intercept) x_value cv1 cv2 cv3
#> -0.14562 0.11732 0.28726 0.05642 0.01249
#>
#>
# ...and so on...
#>
#>
#> $Y_10x2
#>
#> Call:
#> lm(formula = y_i ~ x_value + cv1 + cv2 + cv3, data = .)
#>
#> Coefficients:
#> (Intercept) x_value cv1 cv2 cv3
#> -0.45689 -0.02530 0.61375 0.34377 -0.02357
#>
#>
#> $Y_10x3
#>
#> Call:
#> lm(formula = y_i ~ x_value + cv1 + cv2 + cv3, data = .)
#>
#> Coefficients:
#> (Intercept) x_value cv1 cv2 cv3
#> -0.44423 -0.18377 0.64739 0.27688 -0.02013