0

Question

I am creating a custom function that takes a dataframe, wrangles it to a PCA, and then plots a PCA using factoextra::fviz_pca but I am having trouble coloring the groups of the PCA using the habillage argument. Or, what is the unknown object class of character that is the current bottleneck?

Data

library(tidyverse, stats, factoextra, FactoMineR)
test <- structure(list(sample = c("2-3", "4-2", "1-3", "4-3", "3-1", 
"1-2", "1-2", "4-3", "1-1", "4-2", "2-2", "1-2", "2-1", "1-3", 
"3-2", "1-2", "2-2", "2-2", "3-3", "1-1"), compound = c("2-Ethyl-5-methylpyrazine", 
"Benzaldehyde", "Acetic Acid", "Acetic Acid", "2-Pentylfuran", 
"Nonanoic acid", "alpha-Pinene", "Benzaldehyde", "2-Pentylfuran", 
"alpha-Pinene", "Linalool", "2-Pentylfuran", "2-Pentylfuran", 
"D-Limonene", "Hexanoic acid", "Benzaldehyde", "Acetic Acid", 
"2-Pentylfuran", "Linalool", "Methoxyacetone"), conc = c(38761, 
60293, 79800, 221687, 50454, 23236, 41678, 106749, 52317, 88502, 
118724, 34832, 62559, 1380089, 29133, 61118, 666771, 55145, 67546, 
37265), code = c("2", "4", "1", "4", "3", "1", "1", "4", "1", 
"4", "2", "1", "2", "1", "3", "1", "2", "2", "3", "1")), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -20L))
> test <- df4 %>% ungroup %>% slice_sample(n=20) %>% dput
structure(list(sample = c("4-1", "4-1", "1-3", "1-3", "2-2", 
"1-1", "3-3", "3-2", "2-1", "3-3", "4-2", "2-2", "3-2", "4-3", 
"2-2", "3-3", "4-3", "1-2", "2-1", "1-3"), compound = c("D-Limonene", 
"Nonanal", "D-Limonene", "Nonanal", "alpha-Pinene", "2-Ethyl-5-methylpyrazine", 
"Nonanal", "Hexanoic acid", "Nonanoic acid", "D-Limonene", "Benzaldehyde", 
"Methoxyacetone", "Acetic Acid", "2-Pentylfuran", "Nonanoic acid", 
"Methoxyacetone", "Benzaldehyde", "Acetic Acid", "D-Limonene", 
"alpha-Pinene"), conc = c(857431, 26448, 1380089, 28883, 63246, 
28726, 19727, 29133, 48460, 802456, 60293, 38083, 117253, 74652, 
36791, 53665, 106749, 99287, 1019527, 123793), code = c("4", 
"4", "1", "1", "2", "1", "3", "3", "2", "3", "4", "2", "3", "4", 
"2", "3", "4", "1", "2", "1")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -20L))

Code

my_fviz <- function(df, plot_version, codes_to_plot_v) {
  df %>%
    filter(code %in% codes_to_plot_v) %>% ## I choose what codes I want to plot
    arrange(code) %>%
    group_by(sample, compound) %>%
    pivot_wider(
      names_from = compound,
      values_from = conc,
      values_fill = 0
    ) %>%
    # mutate(across(sample:code, ~as.factor(.))) %>%
    ungroup() %>%
    column_to_rownames(var = "sample") %>%
    #Recode codes to keys for viz labels, and consistent colors for each code
    mutate(code = as.factor(
      recode(
        code,
        `1` = "Key 1 ",
        #1B9E77
        `2` = "Key 2",
        #D95F02
        `3` = "Key 3",
        #7570B3
        `4` = "Key 4",
      )
    )) %>%   #E7298A
    select_if(~ is.numeric(.)) %>%
    prcomp(center = T, scale = T) %>%
    assign(paste0("a", plot_version, "_pca"), ., envir = .GlobalEnv)
   
fviz_pca(
      paste0("a", plot_version, "_pca"),
      # ind.sup = df$code,
      addEllipses = T,
      ellipse.level = 0.95,
      ellipse.type = "confidence",
      habillage = df$code,
      invisible = c("var"),
      label = "",
      repel = T,
      title = "",
      palette = Accent,
      legend.title = ""
    ) + theme(legend.text = element_text(size = 12))
}

I continue getting the error which I can not interpret, because the dataframe has a class prcomp, so I suspect it is from my habillage pointing.

Error in .get_facto_class(X) :
An object of class : character can't be handled by factoextra
6.
stop("An object of class : ", class(X), " can't be handled by factoextra")
5.
.get_facto_class(X)
4.
facto_summarize(X, element = "var", result = c("coord", "contrib",
"cos2"), axes = axes)
3.
fviz_pca_biplot(X, ...)
2.
fviz_pca(paste0("a", plot_version, "_pca"), ind.sup = df$code,
addEllipses = T, ellipse.level = 0.95, ellipse.type = "confidence",
habillage = df$code, invisible = c("var"), label = "", repel = T,
title = "", palette = Accent, legend.title = "")
1.
naepca_fviz(test, 25, c(1, 2))

Question

How can I get the function to output a PCA grouped and colored by the original code column?

Desired Output

> my_fviz(test, 2.1, c(1,2))
``` plots a grouped an colored PCA
marc_s
  • 732,580
  • 175
  • 1,330
  • 1,459
user7264
  • 123
  • 8

0 Answers0