I have data from multiple Sample Groups (two of which are included in my code) that show a value/FoS over a timespan/timepoints. The best model I found is a quartic polynomial function. I want to calculate at which Timepoint the curve from the regression reaches FoS=0.01. In the best case, I also want a confidence interval for this value.
I tried to implement resolutions from other posts on this forum but if I got them working (I am relatively new to R) the calculated values didn't match the curves I plotted.
This is my code for the plot & one of the methods I tried. Here the result is 13 for both but I would according to graph1 expect for B1 approximately 4 and for H1 7:
library(tidyverse)
library(ggplot2)
#Inpt Data
timepoint <- c(0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 7, 7, 7, 10, 10, 14, 14, 14, 21, 21, 21, 28, 28, 28, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 7, 7, 7, 10, 10, 10, 14, 14, 14, 21, 21, 21, 28, 28, 28)
FoS <- c(1.000000e+00, 1.000000e+00, 1.000000e+00, 4.285714e-01, 2.826923e-01, 6.956522e-01, 1.571429e-01, 1.384615e-01, 1.782609e-01, 7.619048e-02, 1.076923e-01, 1.043478e-01, 1.071429e-04, 5.769231e-04, 3.478261e-04, 4.761905e-05, 2.307692e-04, 1.726190e-03, 1.730769e-03, 1.217391e-02, 2.642857e-01, 1.653846e-01, 3.608696e-01, 2.214286e+00, 3.846296e-01, 1.434803e+00, 1.000000e+00, 1.000000e+00, 1.000000e+00, 3.463415e-01, 4.000000e-01, 5.555556e-01, 2.658537e-01, 2.481481e-01, 2.666667e-01, 3.292683e-01, 1.925926e-01, 2.500000e-01, 1.634146e-02, 2.814815e-02, 3.444444e-03, 1.219512e-02, 1.592593e-03, 2.000000e-03, 4.634146e-04, 1.222222e-03, 1.055556e-03, 3.170732e-04, 1.851852e-04, 6.666667e-04, 1.195122e-03, 3.392593e-04, 9.463333e-03)
SampleID <- c("B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05","H1_05", "H1_05","H1_05","H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05")
# Creating a data frame
df <- data.frame(Index = index, Timepoint = timepoint, FoS = FoS, SampleID = SampleID)
#Plot graph
graph1 <- df %>%
ggplot(aes(Timepoint,FoS, color=SampleID))+
scale_y_continuous(trans="log10")+
scale_x_continuous(breaks=seq(0,30,5))+
geom_smooth(method = lm, formula = y ~ poly(x, 4))+
geom_point()+
labs(y="Fraction of Survivors")+
geom_hline(aes(yintercept=0.01))
graph1
# Automate calculation of MDK99 for different sample groups using a fourth-degree polynomial function
sample_groups <- df %>%
group_by(SampleID) %>%
summarize(MDK99 = predict(lm(timepoint ~ poly(FoS, degree = 4), data = .), newdata = data.frame(FoS = 0.01)))
# Print the calculated MDK99 values for different sample groups
print(sample_groups)
Edit1 using approx():
I edited my code according to the suggestions by one and edited so that there are separate regressions for each Sample ID. Unfortunately, the results still differ from what I see in the plot. I get 7.5 for B1 and 27.8 for H1.
library(tidyverse)
library(ggplot2)
#Inpt Data
timepoint <- c(0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 7, 7, 7, 10, 10, 14, 14, 14, 21, 21, 21, 28, 28, 28, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 7, 7, 7, 10, 10, 10, 14, 14, 14, 21, 21, 21, 28, 28, 28)
FoS <- c(1.000000e+00, 1.000000e+00, 1.000000e+00, 4.285714e-01, 2.826923e-01, 6.956522e-01, 1.571429e-01, 1.384615e-01, 1.782609e-01, 7.619048e-02, 1.076923e-01, 1.043478e-01, 1.071429e-04, 5.769231e-04, 3.478261e-04, 4.761905e-05, 2.307692e-04, 1.726190e-03, 1.730769e-03, 1.217391e-02, 2.642857e-01, 1.653846e-01, 3.608696e-01, 2.214286e+00, 3.846296e-01, 1.434803e+00, 1.000000e+00, 1.000000e+00, 1.000000e+00, 3.463415e-01, 4.000000e-01, 5.555556e-01, 2.658537e-01, 2.481481e-01, 2.666667e-01, 3.292683e-01, 1.925926e-01, 2.500000e-01, 1.634146e-02, 2.814815e-02, 3.444444e-03, 1.219512e-02, 1.592593e-03, 2.000000e-03, 4.634146e-04, 1.222222e-03, 1.055556e-03, 3.170732e-04, 1.851852e-04, 6.666667e-04, 1.195122e-03, 3.392593e-04, 9.463333e-03)
SampleID <- c("B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05", "B1_05","H1_05", "H1_05","H1_05","H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05", "H1_05")
# Creating a data frame
df <- data.frame(Timepoint = timepoint, FoS = FoS, SampleID = SampleID)
# Print the data frame
print(df)
# Create an empty list to store regression models
regression_models <- list()
# Loop through each unique sample group and fit a regression model
unique_sample_ids <- unique(df$SampleID)
for (sample_id in unique_sample_ids) {
sample_data <- df %>% filter(SampleID == sample_id)
fit <- lm(FoS ~ poly(Timepoint, 4), data = sample_data)
regression_models[[sample_id]] <- fit
}
# Create an empty data frame to store MDK99 values
sample_groups <- data.frame(SampleID = character(0), MDK99 = numeric(0))
# Loop through each regression model to calculate MDK99
for (sample_id in unique_sample_ids) {
model <- regression_models[[sample_id]]
sample_data <- df %>% filter(SampleID == sample_id)
fitted_values <- predict(model, newdata = sample_data)
mdk99 <- approx(x = fitted_values, y = sample_data$Timepoint, xout = 0.01)$y
sample_groups <- rbind(sample_groups, data.frame(SampleID = sample_id, MDK99 = mdk99))
}
# Print the result
print(sample_groups)