0

I have a data frame:

m.All_Tissues <- structure(list(Sample = c("1: FL_643", "2: FL_645", "3: FL_647", "4: FL_656", "5: FL_658", "6: cKO_644", "7: cKO_646", "8: cKO_654", "9: cKO_655", "10: cKO_657", "1: FL_643", "2: FL_645", "3: FL_647", "4: FL_656", "5: FL_658", "6: cKO_644", "7: cKO_646", "8: cKO_654", "9: cKO_655", "10: cKO_657", "1: FL_643", "2: FL_645", "3: FL_647", "4: FL_656", "5: FL_658", "6: cKO_644", "7: cKO_646", "8: cKO_654", "9: cKO_655", "10: cKO_657"), Genotype = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("miR-15/16 FL", "miR-15/16 cKO"), class = "factor"), Tissue = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("iLN", "Spleen", "Skin"), class = "factor"), variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Cells/SC/Live/CD8—,, CD4+,Freq. of Parent", class = "factor"), 
value = c(41.2, 35.5, 39.5, 33.2, 39.1, 35.5, 35.7, 33.9, 
39.7, 42.4, 23.3, 18.4, 20.9, 15.5, 19, 20.5, 22.5, 21.8, 
23.8, 24.6, 28.8, 16.9, 21.4, 19.5, 25.4, 27.1, 31.3, 28.8, 
52.8, 19)), .Names = c("Sample", "Genotype", "Tissue", "variable", "value"), row.names = 101:130, class = "data.frame")

and I would like to use a function call where I can specify objects as arguments before the objects are created to make the function more flexible.

What I have working is this:

      library(dplyr)
      library(ggplot2)          

      plot_it <- function(subsets = NULL,
                      row_add = NULL) {
    temp <- droplevels(m.All_Tissues[m.All_Tissues$Tissue %in% subsets,])
    rownames(temp) <- NULL
    df <- droplevels(temp[c(row_add),])
    rownames(df) <- NULL

    color.groups <- c("black","red")
    names(color.groups) <- unique(df$Genotype)
    shape.groups <- c(16, 1)
    names(shape.groups) <- unique(df$Genotype)

    dmax = df %>% group_by("Tissue") %>%
      summarise(value = max(value, na.rm = TRUE),
               Genotype = NA)

    ggplot(df, aes(x = Tissue, y = value, color = Genotype, shape = Genotype)) +
      geom_boxplot(position = position_dodge(width = 0.75)) +
      scale_color_manual(values = color.groups) +
      scale_shape_manual(values = shape.groups)
  }
plot_it(subsets = c("Spleen", "iLN"), row_add = c(1:20))

enter image description here But I would like something that looks more like this:

plot_it <- function(data.set = NULL,
                subsets = NULL,
                group.by = NULL,
                comparison = NULL,
                row_add = NULL) {
    temp <- droplevels(data.set[data.set$group.by %in% subsets,])
    rownames(temp) <- NULL
    df <- droplevels(temp[c(row_add),])

    color.groups <- c("black","red")
    names(color.groups) <- unique(df$comparison)
    shape.groups <- c(16, 1)
    names(shape.groups) <- unique(df$comparison)

    dmax = df %>% group_by(group.by) %>%
        sumarise(value = max(value, na.rm = TRUE),
                 comparison = NA)

    ggplot(df, aes(x = group.by, y = value, color = comparison, shape = comparison)) +
      geom_boxplot(position = position_dodge(width = 0.75)) +
      scale_color_manual(values = color.groups) +
      scale_shape_manual(values = shape.groups)
}
plot_it(data.set = m.All_Tissues, subsets = c("Spleen", "iLN"), group.by = "Tissue", comparison = "Genotype", row_add = c(1:20))

I'm not sure how to go about this and would really appreciate any help or pointers in the right direction!

pogibas
  • 27,303
  • 19
  • 84
  • 117
John Gagnon
  • 825
  • 1
  • 8
  • 20
  • 1
    You might be looking for `aes_string`. You'd use that in place `aes` in your function. All variables must be strings in `aes_string`, so you'd use `y = "value"` instead of `y = value`. – aosmith Sep 29 '17 at 23:49
  • But you'd need a slightly different approach for the *dplyr* part of the function. See the [programming vignette](https://cran.r-project.org/web/packages/dplyr/vignettes/programming.html) and [this answer](https://stackoverflow.com/a/44122936/2461552) – aosmith Sep 29 '17 at 23:53

1 Answers1

2

I renamed some arguments; removed unnecessary parts; added libraries.
But the most important part is that I call columns from data frames using dataSet[, groupBy] and in ggplot2 I use color = get(comparison) (because of that you have to specify axis and color/shape names).

plot_it <- function(dataSet, subsets, groupBy, comparison, rowAdd) {
    library(dplyr)
    library(ggplot2)

    temp <- dataSet[dataSet[, groupBy] %in% subsets, ]
    rownames(temp) <- NULL
    df <- temp[rowAdd, ]

    color.groups <- c("black","red")
    names(color.groups) <- unique(df[, comparison])
    shape.groups <- c(16, 1)
    names(shape.groups) <- unique(df[, comparison])

    # dmax <- df %>% 
    #     group_by_(groupBy) %>%
    #     summarise(value = max(value, na.rm = TRUE),
    #              comparison = NA)

    ggplot(df, aes(get(groupBy), value, 
                   color = get(comparison), shape = get(comparison))) +
      geom_boxplot(position = position_dodge(width = 0.75)) +
      labs(x = groupBy,
           color = comparison,
           shape = comparison) +
      scale_color_manual(values = color.groups) +
      scale_shape_manual(values = shape.groups)
}

plot_it(m.All_Tissues, c("Spleen", "iLN"), "Tissue", "Genotype", 1:20)

dmax part in your function does nothing

pogibas
  • 27,303
  • 19
  • 84
  • 117
  • Thanks! This works very well. The dmax part is actually important for generating line segments positioned based on the maximum value of each plotted `Tissue`. I'm still having trouble getting the dmax assign to work properly by using `group_by(get(groupBy)` as it results in the column name being reassigned as "get(groupBy)" rather than "Tissue". Any suggestion for this part? – John Gagnon Oct 01 '17 at 22:49