Question
I am creating a custom function that takes a dataframe, wrangles it to a PCA, and then plots a PCA using factoextra::fviz_pca but I am having trouble coloring the groups of the PCA using the habillage
argument. Or, what is the unknown object class of character that is the current bottleneck?
Data
library(tidyverse, stats, factoextra, FactoMineR)
test <- structure(list(sample = c("2-3", "4-2", "1-3", "4-3", "3-1",
"1-2", "1-2", "4-3", "1-1", "4-2", "2-2", "1-2", "2-1", "1-3",
"3-2", "1-2", "2-2", "2-2", "3-3", "1-1"), compound = c("2-Ethyl-5-methylpyrazine",
"Benzaldehyde", "Acetic Acid", "Acetic Acid", "2-Pentylfuran",
"Nonanoic acid", "alpha-Pinene", "Benzaldehyde", "2-Pentylfuran",
"alpha-Pinene", "Linalool", "2-Pentylfuran", "2-Pentylfuran",
"D-Limonene", "Hexanoic acid", "Benzaldehyde", "Acetic Acid",
"2-Pentylfuran", "Linalool", "Methoxyacetone"), conc = c(38761,
60293, 79800, 221687, 50454, 23236, 41678, 106749, 52317, 88502,
118724, 34832, 62559, 1380089, 29133, 61118, 666771, 55145, 67546,
37265), code = c("2", "4", "1", "4", "3", "1", "1", "4", "1",
"4", "2", "1", "2", "1", "3", "1", "2", "2", "3", "1")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -20L))
> test <- df4 %>% ungroup %>% slice_sample(n=20) %>% dput
structure(list(sample = c("4-1", "4-1", "1-3", "1-3", "2-2",
"1-1", "3-3", "3-2", "2-1", "3-3", "4-2", "2-2", "3-2", "4-3",
"2-2", "3-3", "4-3", "1-2", "2-1", "1-3"), compound = c("D-Limonene",
"Nonanal", "D-Limonene", "Nonanal", "alpha-Pinene", "2-Ethyl-5-methylpyrazine",
"Nonanal", "Hexanoic acid", "Nonanoic acid", "D-Limonene", "Benzaldehyde",
"Methoxyacetone", "Acetic Acid", "2-Pentylfuran", "Nonanoic acid",
"Methoxyacetone", "Benzaldehyde", "Acetic Acid", "D-Limonene",
"alpha-Pinene"), conc = c(857431, 26448, 1380089, 28883, 63246,
28726, 19727, 29133, 48460, 802456, 60293, 38083, 117253, 74652,
36791, 53665, 106749, 99287, 1019527, 123793), code = c("4",
"4", "1", "1", "2", "1", "3", "3", "2", "3", "4", "2", "3", "4",
"2", "3", "4", "1", "2", "1")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -20L))
Code
my_fviz <- function(df, plot_version, codes_to_plot_v) {
df %>%
filter(code %in% codes_to_plot_v) %>% ## I choose what codes I want to plot
arrange(code) %>%
group_by(sample, compound) %>%
pivot_wider(
names_from = compound,
values_from = conc,
values_fill = 0
) %>%
# mutate(across(sample:code, ~as.factor(.))) %>%
ungroup() %>%
column_to_rownames(var = "sample") %>%
#Recode codes to keys for viz labels, and consistent colors for each code
mutate(code = as.factor(
recode(
code,
`1` = "Key 1 ",
#1B9E77
`2` = "Key 2",
#D95F02
`3` = "Key 3",
#7570B3
`4` = "Key 4",
)
)) %>% #E7298A
select_if(~ is.numeric(.)) %>%
prcomp(center = T, scale = T) %>%
assign(paste0("a", plot_version, "_pca"), ., envir = .GlobalEnv)
fviz_pca(
paste0("a", plot_version, "_pca"),
# ind.sup = df$code,
addEllipses = T,
ellipse.level = 0.95,
ellipse.type = "confidence",
habillage = df$code,
invisible = c("var"),
label = "",
repel = T,
title = "",
palette = Accent,
legend.title = ""
) + theme(legend.text = element_text(size = 12))
}
I continue getting the error which I can not interpret, because the dataframe has a class prcomp, so I suspect it is from my habillage pointing.
Error in .get_facto_class(X) :
An object of class : character can't be handled by factoextra
6.
stop("An object of class : ", class(X), " can't be handled by factoextra")
5.
.get_facto_class(X)
4.
facto_summarize(X, element = "var", result = c("coord", "contrib",
"cos2"), axes = axes)
3.
fviz_pca_biplot(X, ...)
2.
fviz_pca(paste0("a", plot_version, "_pca"), ind.sup = df$code,
addEllipses = T, ellipse.level = 0.95, ellipse.type = "confidence",
habillage = df$code, invisible = c("var"), label = "", repel = T,
title = "", palette = Accent, legend.title = "")
1.
naepca_fviz(test, 25, c(1, 2))
Question
How can I get the function to output a PCA grouped and colored by the original code
column?
Desired Output
> my_fviz(test, 2.1, c(1,2))
``` plots a grouped an colored PCA