I hope you're doing well!
I'm trying to produce an R-squared value from my model fit in R, but the model fit itself is based on a specific function from a script. Unfortunately, I haven't been able to find a solution so far.
First things first, a list of the required packages:
library(ggplot2)
library(deSolve)
library(rootSolve)
library(coda)
library(minpack.lm)
library(FME)
library(phytotools)
The minimum reproducible data example:
kelppercent <- structure(list(Kelp_Cover = c(89.8, 98.5, 87.3, 8.733333333,
4.866666667, 6.733333333, 96.6, 71.26666667, 71.64, 3.2, 0, 85.66666667,
10.6, 0, 0, 100, 92.70333333, 74.03333333, 62.53333333, 25.23333333,
3.033333333, 11.9, 1.466666667, 2.2, 0, 96.8, 86.26666667, 43.26666667,
96.5, 7.633333333, 87.33333333, 83.56666667, 48.93333333, 0,
0, 0, 86.8, 74.9, 48.33333333, 4.766666667, 0, 0, 0, 0, 0, 0,
0, 61.63333333, 62.6, 23.53333333, 5.766666667, 3, 57.53333333,
80.3, 66.43333333, 45.16666667, 21.33333333, 4.866666667, 96.26666667,
0, 0, 5.033333333, 0, 59.93333333, 40.16666667, 9.633333333,
10.8, 0, 7.066666667, 43.6, 73.36666667, 2.766666667, 8.3, 31.33333333,
77.96666667, 11.46666667, 3.333333333, 98.66666667, 83.83333333,
0, 85.4, 2.766666667, 97.3, 79.7, 65.83333333, 4.233333333, 3.533333333,
99.23333333, 85.63333333, 74, 68.16666667, 18.86666667, 0, 66.6
), averagePAR = c(411.0328394, 415.1633577, 214.5924198, 57.33305713,
29.63469497, 15.31777983, 345.0506636, 148.232038, 63.6797416,
11.75221473, 5.048692624, 359.1309589, 32.10261553, 14.35388982,
6.417986499, 294.0376578, 107.6421231, 39.40592762, 14.42583152,
5.2810485, 1.933300914, 385.168096, 184.7042801, 88.57346033,
42.47469453, 415.1633577, 214.5924198, 110.9199687, 415.1633577,
57.33305713, 94.52133048, 11.12336013, 151.2262102, 1.009339599,
0.190038121, 0.035780313, 173.9490223, 80.95080298, 37.67214334,
17.53151707, 8.15865686, 160.5765179, 32.10261553, 6.417986499,
1.283090179, 0.256516652, 0.051283062, 210.3436414, 107.6421231,
55.08522426, 28.18953998, 14.42583152, 315.3555047, 123.8161165,
48.61316984, 19.08669362, 7.493892595, 2.942281535, 199.0882679,
3.031877918, 0.7515082, 32.10261553, 14.35388982, 182.8666314,
87.25490901, 41.63372557, 19.86555398, 9.478859492, 408.9830166,
106.0397056, 53.99457571, 27.49360902, 13.99952731, 89.91193692,
404.9139874, 26.15286547, 13.18434033, 396.8969763, 434.2704123,
20.06518531, 400.8854416, 24.87750414, 373.7858211, 80.95080298,
37.67214334, 17.53151707, 8.15865686, 360.9309232, 162.1901705,
72.8827865, 32.75106347, 14.71722213, 6.613422716, 408.9830166
)), row.names = c(NA, -94L), class = c("tbl_df", "tbl", "data.frame"
))
The fully functional script for the model function:
fitPGH <- function(x, #E
y, #Quantum Efficiency, rETR or P
normalize=FALSE, #Should curve be normalized to E (Default=TRUE for modeling Quantum Efficiency)
lowerlim=c(-Inf), #Lower bounds of parameter estimates (alpha,Beta,Ps)
upperlim=c(Inf), #Upper bounds of parameter estimates (alpha,Beta,Ps)
fitmethod=c("Nelder-Mead")) #Fitting method passed to modFit
{
#If normalize =T, assign E = 0 to very small number
if (normalize==T) x[x==0] <- 1e-9
#Remove NA values
ind <- is.finite(x) & is.finite(y)
res <- rep(NA,length(x))
x <- x[ind==T]
y <- y[ind==T]
#Intitial Parameter Estimates
if (normalize==T){
alpha <- max(y)
beta <- 0
ps <- max(x*y)
}
if (normalize==F){
PE <- y/x
alpha <- max(PE[is.finite(PE)])
beta <- 0
ps <- max(y)
}
#Load the model
PGH <- function(p,x) return(data.frame(x = x, y = p[3]*(1-exp(-1*p[1]*x/p[3]))*exp(-1*p[2]*x/p[3])))
PGH.E <- function(p,x) return(data.frame(x = x, y = p[3]*(1-exp(-1*p[1]*x/p[3]))*exp(-1*p[2]*x/p[3])/x))
if (normalize==F) model.1 <- function(p) (y - PGH(p, x)$y)
if (normalize==T) model.1 <- function(p) (y - PGH.E(p, x)$y)
#In case of non-convergence, NAs are returned
if (class(try(modFit(f = model.1,p = c(alpha,beta,ps),method = fitmethod,
lower=lowerlim,upper=upperlim,
hessian = TRUE),silent=T))=="try-error"){
fit <- list(alpha=NA,beta=NA,ps=NA,ssr=NA,residuals=rep(NA,c(length(x))))
}else{
fit <- modFit(f = model.1,p = c(alpha,beta,ps),method = fitmethod,
lower=lowerlim,upper=upperlim, hessian = TRUE)
fit <- list(alpha=summary(fit)$par[1,],beta=summary(fit)$par[2,],ps=summary(fit)$par[3,],
ssr=fit$ssr,residuals=fit$residuals,model="PGH",normalize=normalize)
}
return(fit)
}
And plotting the model fit onto the data:
PAR <- kelppercent$averagePAR
Pc <- kelppercent$Kelp_Cover
#Call function
myfit <- fitPGH(PAR, Pc)
#Plot input data
plot(PAR, Pc, xlim=c(0,450), ylim=c(0,100), xlab="PAR", ylab="Pc")
#Add model fit
E <- seq(0,450,by=1)
with(myfit,{
P <- ps[1]*(1-exp(-1*alpha[1]*E/ps[1]))*exp(-1*beta[1]*E/ps[1])
lines(E,P)
})
Plotting out the lines from E and the necessary P formula puts the fit onto the graph. Now, this is where I'm stuck: I want to generate the R-squared value to understand the relationship between the fit and the base values.
Thank you for your time, and any direction, pointers, or answers you may have would be incredible!