two break points in two different groups in R

Question

I would like get the slopes of the two regression lines in each group (total of four regression lines) separated by break points that can be random (see example).

After that, is it possible to compare them with a statistic test (unpaired t-test)?

Thank you!

Here is the code for the plot:

library(dplyr)
library(ggplot2)
library(ggpubr)

df %>%
  mutate(absVO2 = absVO2*1000) %>% 
  filter(percent_power < 100) %>%
  ggplot(aes(x = percent_power, y = absVO2, color = group)) +
  geom_point(size = 1) +
  geom_smooth(method = "lm", formula = y ~ x) +
  stat_regline_equation(label.x = 30, label.y = c(2000,3000), 
                        formula = y ~ x,
                        aes(color = group, label =  paste(..eq.label.., ..adj.rr.label.., sep = "~~~~"))) +
  xlab("Percentage of power (%)") + 
  ylab(expression(paste("V", O[2]," (L/min)"))) +
  scale_x_continuous(limits = c(25, 100), breaks = seq(25, 100, by = 25)) +
  theme_classic() +
  theme(
    panel.background = element_rect(fill ="white", colour = "white"),
    axis.line = element_line(colour = "black"),
    legend.title = element_text(face = "bold"),
    legend.position = "none",
    axis.title.y = element_blank(),
    strip.text = element_blank()
  )

Here are my data:

structure(list(power = c(25L, 40L, 55L, 70L, 85L, 100L, 115L, 
25L, 40L, 55L, 70L, 85L, 100L, 115L, 130L, 25L, 40L, 55L, 70L, 
85L, 100L, 25L, 40L, 55L, 70L, 85L, 100L, 115L, 130L, 145L, 160L, 
175L, 190L, 20L, 30L, 40L, 50L, 60L, 70L, 80L, 90L, 20L, 30L, 
40L, 50L, 60L, 70L, 80L, 90L, 100L, 110L, 120L, 130L, 25L, 40L, 
55L, 70L, 85L, 100L, 115L, 25L, 40L, 55L, 70L, 85L, 25L, 40L, 
55L, 70L, 85L, 100L, 115L, 130L, 145L, 160L, 175L, 25L, 40L, 
55L, 70L, 85L, 100L, 115L, 130L, 145L, 160L, 175L, 190L, 205L, 
20L, 30L, 40L, 50L, 60L, 70L, 80L, 90L, 50L, 60L, 70L, 80L, 90L, 
100L, 110L, 120L, 20L, 30L, 40L, 50L, 60L, 70L, 80L), absVO2 = c(0.739, 
0.81975, 0.95125, 1.07525, 1.199, 1.34575, 1.49775, 0.66125, 
0.7485, 0.855, 0.9845, 1.1495, 1.3465, 1.451, 1.5985, 0.61675, 
0.717, 0.86275, 0.98575, 1.13, 1.262, 0.8835, 0.94575, 1.08125, 
1.244, 1.30475, 1.4735, 1.679, 1.79075, 1.96, 2.0405, 2.34425, 
2.4435, 0.5925, 0.661, 0.7435, 0.87875, 0.9435, 0.99675, 1.11425, 
1.20275, 0.9255, 1.02925, 1.11675, 1.19725, 1.24775, 1.42625, 
1.54225, 1.59425, 1.69675, 1.776, 1.94525, 2.0395, 0.535, 0.5845, 
0.76875, 0.982, 1.09975, 1.259, 1.3265, 0.7205, 0.86825, 0.9235, 
1.01075, 1.17275, 0.7435, 0.76575, 0.96075, 1.0975, 1.21975, 
1.34525, 1.50625, 1.6755, 1.86325, 2.0465, 2.1395, 0.613, 0.85525, 
0.98, 1.076, 1.287, 1.4615, 1.59325, 1.6965, 1.884, 1.998, 2.1425, 
2.31275, 2.474, 0.81075, 0.8035, 0.8645, 1.076, 1.119, 1.255, 
1.33825, 1.499, 0.8105, 0.8865, 1.03725, 1.134, 1.26675, 1.36275, 
1.4455, 1.52875, 0.64675, 0.6645, 0.742, 0.81675, 0.983, 1.06875, 
1.155), percent_power = c(21.7391304347826, 34.7826086956522, 
47.8260869565217, 60.8695652173913, 73.9130434782609, 86.9565217391304, 
100, 19.2307692307692, 30.7692307692308, 42.3076923076923, 53.8461538461538, 
65.3846153846154, 76.9230769230769, 88.4615384615385, 100, 25, 
40, 55, 70, 85, 100, 13.1578947368421, 21.0526315789474, 28.9473684210526, 
36.8421052631579, 44.7368421052632, 52.6315789473684, 60.5263157894737, 
68.4210526315789, 76.3157894736842, 84.2105263157895, 92.1052631578947, 
100, 22.2222222222222, 33.3333333333333, 44.4444444444444, 55.5555555555556, 
66.6666666666667, 77.7777777777778, 88.8888888888889, 100, 15.3846153846154, 
23.0769230769231, 30.7692307692308, 38.4615384615385, 46.1538461538462, 
53.8461538461538, 61.5384615384615, 69.2307692307692, 76.9230769230769, 
84.6153846153846, 92.3076923076923, 100, 21.7391304347826, 34.7826086956522, 
47.8260869565217, 60.8695652173913, 73.9130434782609, 86.9565217391304, 
100, 29.4117647058824, 47.0588235294118, 64.7058823529412, 82.3529411764706, 
100, 14.2857142857143, 22.8571428571429, 31.4285714285714, 40, 
48.5714285714286, 57.1428571428571, 65.7142857142857, 74.2857142857143, 
82.8571428571429, 91.4285714285714, 100, 12.1951219512195, 19.5121951219512, 
26.8292682926829, 34.1463414634146, 41.4634146341463, 48.780487804878, 
56.0975609756098, 63.4146341463415, 70.7317073170732, 78.0487804878049, 
85.3658536585366, 92.6829268292683, 100, 22.2222222222222, 33.3333333333333, 
44.4444444444444, 55.5555555555556, 66.6666666666667, 77.7777777777778, 
88.8888888888889, 100, 41.6666666666667, 50, 58.3333333333333, 
66.6666666666667, 75, 83.3333333333333, 91.6666666666667, 100, 
25, 37.5, 50, 62.5, 75, 87.5, 100), group = c("CAD", "CAD", "CAD", 
"CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", 
"CAD", "CAD", "CAD", "Healthy", "Healthy", "Healthy", "Healthy", 
"Healthy", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy", 
"Healthy", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy", 
"Healthy", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy", 
"Healthy", "Healthy", "Healthy", "Healthy", "CAD", "CAD", "CAD", 
"CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", 
"Healthy", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy", 
"Healthy", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy", 
"CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", 
"CAD", "CAD", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy", 
"Healthy", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy", 
"Healthy", "Healthy", "CAD", "CAD", "CAD", "CAD", "CAD", "CAD", 
"CAD", "CAD", "Healthy", "Healthy", "Healthy", "Healthy", "Healthy", 
"Healthy", "Healthy", "Healthy", "CAD", "CAD", "CAD", "CAD", 
"CAD", "CAD", "CAD")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -112L))

What I would expect:

score 0 · Accepted Answer · answered Feb 06 '23 at 10:15

You will increase your likelihood of getting help, if you include a full reprex of your code (notably all necessary library calls - I fixed that for you, but nobody really wants to search which libraries are needed to run your code).

Anyways, what you need is to create a sub group and use that in your smoothing:

df_new <- df %>%
  mutate(absVO2 = absVO2*1000) %>% 
  filter(percent_power < 100) %>%
  mutate(sub_group = if_else(group == "CAD", 
                             if_else(percent_power <= 47, "CAD <= 47", "CAD > 47"),
                             if_else(percent_power <= 62, "Healthy <= 62", "Healthy > 62")))

Then you simply supply this sub group iny our ggplot call:

ggplot(df_new, aes(x = percent_power, y = absVO2, color = sub_group)) +
  geom_point(size = 1) +
  geom_smooth(method = "lm", formula = y ~ x) +
  stat_regline_equation(label.x = 30, label.y = c(1500, 2000, 2500, 3000), 
                        formula = y ~ x,
                        aes(color = sub_group, 
                            label =  paste(..eq.label.., ..adj.rr.label.., sep = "~~~~"))) +
  xlab("Percentage of power (%)") + 
  ylab(expression(paste("V", O[2]," (L/min)"))) +
  scale_x_continuous(limits = c(25, 100), breaks = seq(25, 100, by = 25)) +
  scale_color_brewer(palette = "Paired") +
  theme_classic()

As for the test, using an interaction between sub_group and percent_power will allow for separate slopes. For the given data, this seems not to be significant:

summary(lm(absVO2 ~ percent_power + sub_group:percent_power, df_new))

# Call:
# lm(formula = absVO2 ~ percent_power + sub_group:percent_power, 
#     data = df_new)

# Residuals:
#    Min     1Q Median     3Q    Max 
# -539.7 -214.3 -103.0  255.2  668.0 

# Coefficients:
#                                      Estimate Std. Error t value Pr(>|t|)    
# (Intercept)                          487.4348   119.8238   4.068 9.86e-05 ***
# percent_power                         12.5709     4.2428   2.963  0.00386 ** 
# percent_power:sub_groupCAD > 47       -0.4004     3.0855  -0.130  0.89702    
# percent_power:sub_groupHealthy <= 62  -0.5094     2.7561  -0.185  0.85376    
# percent_power:sub_groupHealthy > 62    0.3367     3.1741   0.106  0.91575    
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

# Residual standard error: 307.4 on 94 degrees of freedom
# Multiple R-squared:  0.4928,    Adjusted R-squared:  0.4712 
# F-statistic: 22.83 on 4 and 94 DF,  p-value: 3.36e-13

two break points in two different groups in R

1 Answers1