0

I am trying to follow examples from R for data science and other SO questions including here. I want the linear regression coefficient for int, but retain the other columns, and have the options of getting the other items from the regression model.

This is a sample of my data

structure(list(PAV = c(291, 291, 291, 291, 291, 291, 213, 213, 
213, 213, 213, 213, 213, 213, 213, 153, 153, 153, 153, 153, 153, 
24, 24, 291, 291, 153, 153, 24), egfr = c(17.9482589517147, 19.4030371520903, 
15.3900891409336, 16.8136023145106, 16.4108170435046, 14.751117054461, 
18.2612036808828, 16.4726251956924, 15.812571386672, 19.9934832041554, 
18.6220264300866, 16.9502383756366, 16.1600606239007, 15.5059688309318, 
16.2950166774761, 40.4200208875354, 43.1658192315661, 40.5857301899204, 
47.578413903664, 41.8014497433408, 35.9122138362589, 89.4824545556388, 
89.3963891514983, 13.8301296793776, 12.4861579230554, 38.1476553139947, 
36.833890692659, 77.0986172754467), Sex = c("M", "M", "M", "M", 
"M", "M", "F", "F", "F", "F", "F", "F", "F", "F", "F", "M", "M", 
"M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M"), int = c(0, 
0.0931506849315069, 0.419178082191781, 0.498630136986301, 0.553424657534247, 
0.70958904109589, 0, 0.163934426229508, 0.224043715846995, 0.344262295081967, 
0.431693989071038, 0.546448087431694, 0.707650273224044, 0.898907103825137, 
0.975409836065574, 0, 0.419178082191781, 0.476712328767123, 0.526027397260274, 
0.578082191780822, 0.717808219178082, 0, 0.136986301369863, 0.879452054794521, 
0.945205479452055, 1, 1, 0.747945205479452), baseline = structure(c(1618963200, 
1618963200, 1618963200, 1618963200, 1618963200, 1618963200, 1574899200, 
1574899200, 1574899200, 1574899200, 1574899200, 1574899200, 1574899200, 
1574899200, 1574899200, 1618876800, 1618876800, 1618876800, 1618876800, 
1618876800, 1618876800, 1634515200, 1634515200, 1618963200, 1618963200, 
1618876800, 1618876800, 1634515200), tzone = "UTC", class = c("POSIXct", 
"POSIXt"))), row.names = c(NA, -28L), class = c("tbl_df", "tbl", 
"data.frame"))

This is a fairly simple way which works, but loses the other columns and I get a warning that do is deprecated:

check = df%>%
  group_by(PAV)%>%
do(model = lm(egfr ~ int, data = .))%>%
mutate(decline = model$coefficients["int"])

This is another way I am trying:

newdf = df%>%
  group_by(PAV)%>%
  mutate(n_test = n())%>%
  nest()%>%
  mutate(model = map(data, ~ lm(egfr ~ int, data = .)))%>%
  unnest(data)

I then want to get the contents of the model list out. I have seen examples of broom::tidy (something like mutate(tidy_model = list(tidy(model))) but can't get it working.

Mark Davies
  • 787
  • 5
  • 18

0 Answers0