I am trying to create a polynomial equation by plotting known depths on two cores along my y and x-axis. Theoretically, this means that I can enter a depth from one core into the equation to get the depth on the other. Basically I am trying to correlate the two cores as best as possible.
However, I am finding that my output values deviate significantly (I enter a value knowing the output but it is vastly different). I am also concerned about the high r2 values.
The main questions I have are as below: 1: Is this issue due to my lack of understanding around statistics or failure within my code 2: Is what I want to achieve even possible? 3: Will I just have to accept the seemingly large margins for error.
Any help or suggestions would be greatly appreciated. I have battled with this on my own to no end for too long now.
library(ggplot2)
library(tidyverse)
library(cowplot)
setwd("/Users/jakobparrish/Dropbox/Jakob/2019/Lake Nganoke-Thesis Prep/Core Work/Hyperspectral-Chlorophyl 'A'/Graphs R/Chlorophyll A")
SPEC <-read_csv("Correlations.csv")
Correlations <-read_csv("Correlations.csv")
lm_eqn <- function(df, degree, raw=TRUE){
m <- lm(y ~ poly(x, degree, raw=raw), df) # get the fit
cf <- round(coef(m), 5) # round the coefficients
r2 <- round(summary(m)$r.squared, 5) # round the r.squared
powers <- paste0("^", seq(length(cf)-1)) # create the powers for the equation
powers[1] <- "" # remove the first one as it's redundant (x^1 = x)
# first check the sign of the coefficient and assign +/- and paste it with
# the appropriate *italic(x)^power. collapse the list into a string
pcf <- paste0(ifelse(sign(cf[-1])==1, " + ", " - "), abs(cf[-1]),
paste0("*italic(x)", powers), collapse = "")
# paste the rest of the equation together
eq <- paste0("italic(y) == ", cf[1], pcf, "*','", "~italic(r)^2==", r2)
eq
}
###############################
#Plots LC1U vs LC3U
df1 <- data.frame("x"=Correlations$LC3U, "y"=Correlations$LC1U)
df1 <- na.omit(df1)
p1v3 <- ggplot(df1, aes(x = x, y = y)) +
geom_point()+
labs(x ='LC3U [cm]', y ='LC1U [cm]', title = 'Core Correlations of Lake Nganoke LC1U & LC3U') +
stat_smooth(method = "lm", formula = y ~ poly(x, 2, raw = TRUE), size = 1) +
annotate("text", x = 10, y = 10, label = lm_eqn(df1, 2, raw = TRUE),
hjust = 0, family = "Times", parse = TRUE) +
scale_y_continuous(breaks = c(0,10,20,30,40,50,60,70,80,90)) + #add limits in
scale_x_continuous(breaks = c(0,10,20,30,40,50,60,70,80,90)) +
expand_limits(y=c(10,90),x=c(10,90)) +
theme_classic()
p1v3
###############################
#Plots LC2U vs LC3U
df2 <- data.frame("x"=Correlations$LC3U, "y"=Correlations$LC2U)
df2 <- na.omit(df2)
p2v3 <- ggplot(df2, aes(x = x, y = y)) +
geom_point()+
labs(x ='LC3U [cm]', y ='LC2U [cm]', title = 'Core Correlations of Lake Nganoke LC2U & LC3U') +
stat_smooth(method = "lm", formula = y ~ poly(x, 4, raw = TRUE), size = 1) +
annotate("text", x = 10, y = 10, label = lm_eqn(df2, 4, raw = TRUE),
hjust = 0, family = "Times", parse = TRUE) +
scale_y_continuous(breaks = c(0,10,20,30,40,50,60,70,80,90)) + #add limits in
scale_x_continuous(breaks = c(0,10,20,30,40,50,60,70,80,90)) +
expand_limits(y=c(10,90),x=c(10,90)) +
theme_classic()
p2v3
#################################
#Plots all two together
P_Correlations <-plot_grid(p1v3, p2v3, labels = "AUTO")
P_Correlations