First define a small regfun
that computes the desired summary statistics. Then, using by
apply it group-wise. For the combination of two groups we may paste
the columns together use the interaction function :
for factors.
regfun <- function(x) summary(lm(w ~ h, x))$coe[2, c(1, 4)]
do.call(rbind, by(d, d$city, regfun))
# Estimate Pr(>|t|)
# a -0.1879530 0.4374580
# b -0.2143780 0.4674864
# c -0.2866948 0.5131854
do.call(rbind, by(d, d$diet, regfun))
# Estimate Pr(>|t|)
# y -0.1997162 0.3412652
# z -0.3512349 0.4312766
# do.call(rbind, by(d, Reduce(paste, d[1:2]), regfun))
with(d, do.call(rbind, by(d, city:diet, regfun))) ## credits to @G.Grothendieck
# Estimate Pr(>|t|)
# a y -0.2591764 0.5576043
# a z -0.1543536 0.8158689
# b y -0.1966501 0.7485405
# b z -0.4354839 0.7461538
# c y -0.5000000 0.3333333
# c z -1.0671642 0.7221495
Edit
If we have an unbalanced panel, i.e. with(d, city:diet)
gives "impossible" combinations that aren't actually in the data, we have to code this slightly different. You can think of by
as a combination of first split
then lapply
, so let's to that. Because we'll get errors, we may use tryCatch
to provide a similar substitute.
s <- with(d2, split(d2, city:diet))
do.call(rbind, lapply(s, function(x)
tryCatch(regfun(x),
error=function(e) cbind.data.frame(Estimate=NA, `Pr(>|t|)`=NA))))
# Estimate Pr(>|t|)
# a:y -0.2591764 0.5576043
# a:z NA NA
# b:y 5.2500000 NaN
# b:z NA NA
# c:y -0.5000000 0.3333333
# c:z 9.5000000 NaN
# d:y NA NA
# d:z 1.4285714 NaN
# e:y NA NA
# e:z -7.0000000 NaN
# f:y NA NA
# f:z 2.0000000 NaN
Data:
d <- structure(list(city = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("a",
"b", "c"), class = "factor"), diet = structure(c(1L, 1L, 1L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("y",
"z"), class = "factor"), id = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), w = c(66L, 54L, 50L,
74L, 59L, 53L, 67L, 75L, 66L, 64L, 73L, 56L, 53L, 74L, 54L, 63L,
69L, 75L), h = c(152L, 190L, 174L, 176L, 185L, 186L, 180L, 194L,
154L, 169L, 183L, 177L, 189L, 152L, 182L, 191L, 173L, 179L)), out.attrs = list(
dim = c(city = 3L, diet = 2L, id = 3L), dimnames = list(city = c("city=a",
"city=b", "city=c"), diet = c("diet=y", "diet=z"), id = c("id=1",
"id=2", "id=3"))), row.names = c(NA, -18L), class = "data.frame")
d2 <- structure(list(city = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 1L,
2L, 3L, 4L, 5L, 3L, 1L, 6L, 3L, 6L, 2L, 3L), .Label = c("a",
"b", "c", "d", "e", "f"), class = "factor"), diet = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L,
2L), .Label = c("y", "z"), class = "factor"), id = c(1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L
), w = c(66L, 54L, 50L, 74L, 59L, 53L, 67L, 75L, 66L, 64L, 73L,
56L, 53L, 74L, 54L, 63L, 69L, 75L), h = c(152L, 190L, 174L, 176L,
185L, 186L, 180L, 194L, 154L, 169L, 183L, 177L, 189L, 152L, 182L,
191L, 173L, 179L)), out.attrs = list(dim = c(city = 3L, diet = 2L,
id = 3L), dimnames = list(city = c("city=a", "city=b", "city=c"
), diet = c("diet=y", "diet=z"), id = c("id=1", "id=2", "id=3"
))), row.names = c(NA, -18L), class = "data.frame")