2

I'd like to make a simple if/else condition in a dplyr function. I've looked at some helpful posts (e.g., How to parametrize function calls in dplyr 0.7?), but am still running into trouble.

Below is a toy example that works when I call the function without the grouping variable. The function then fails with the grouping variable.

# example dataset
test <- tibble(
  A = c(1:5,1:5),
  B = c(1,2,1,2,3,3,3,3,3,3),
  C = c(1,1,1,1,2,3,4,5,4,3)
)

# begin function, set default for group var to NULL.
prop_tab <- function(df, column, group = NULL) {

  col_name <- enquo(column)
  group_name <- enquo(group)

  # if group_by var is NOT null, then...
  if(!is.null(group)) {
      temp <- df %>%
        select(!!col_name, !!group_name) %>% 
        group_by(!!group_name) %>% 
        summarise(Percentages = 100 * length(!!col_name) / nrow(df))

  } else {
  # if group_by var is null, then...
      temp <- df %>%
        select(!!col_name) %>% 
        group_by(col_name = !!col_name) %>% 
        summarise(Percentages = 100 * length(!!col_name) / nrow(df)) 

  }

  temp
}

test %>% prop_tab(column = C)  # works

test %>% prop_tab(column = A, group = B)  # fails
# Error in prop_tab(., column = A, group = B) : object 'B' not found
Daniel
  • 415
  • 1
  • 6
  • 16

3 Answers3

3

The problem here is that when you supply unquoted arguments, is.null doesn't know what to do with it. So this code tries to check whether object B is null and errors because B does not exist in that scope. Instead, you can use missing() to check whether an argument was supplied to the function, like so. There may be a cleaner way but this at least works, as you can see at the bottom.

library(tidyverse)
test <- tibble(
  A = c(1:5,1:5),
  B = c(1,2,1,2,3,3,3,3,3,3),
  C = c(1,1,1,1,2,3,4,5,4,3)
)

# begin function, set default for group var to NULL.
prop_tab <- function(df, column, group) {

  col_name <- enquo(column)
  group_name <- enquo(group)

  # if group_by var is not supplied, then:
  if(!missing(group)) {
    temp <- df %>%
      select(!!col_name, !!group_name) %>%
    group_by(!!group_name) %>%
    summarise(Percentages = 100 * length(!!col_name) / nrow(df))

  } else {
    # if group_by var is null, then...
    temp <- df %>%
      select(!!col_name) %>% 
      group_by(col_name = !!col_name) %>% 
      summarise(Percentages = 100 * length(!!col_name) / nrow(df)) 

  }

  temp
}

test %>% prop_tab(column = C)  # works
#> # A tibble: 5 x 2
#>   col_name Percentages
#>      <dbl>       <dbl>
#> 1        1          40
#> 2        2          10
#> 3        3          20
#> 4        4          20
#> 5        5          10

test %>% prop_tab(column = A, group = B)
#> # A tibble: 3 x 2
#>       B Percentages
#>   <dbl>       <dbl>
#> 1     1          20
#> 2     2          20
#> 3     3          60

Created on 2018-06-29 by the reprex package (v0.2.0).

Calum You
  • 14,687
  • 4
  • 23
  • 42
2

You can use missing instead of is.null, so your argument won't be evaluated (that's what cause the error):

prop_tab <- function(df, column, group = NULL) {

  col_name <- enquo(column)
  group_name <- enquo(group)

  # if group_by var is NOT null, then...
  if(!missing(group)) {
    temp <- df %>%
      select(!!col_name, !!group_name) %>% 
      group_by(!!group_name) %>% 
      summarise(Percentages = 100 * length(!!col_name) / nrow(df))

  } else {
    # if group_by var is null, then...
    temp <- df %>%
      select(!!col_name) %>% 
      group_by(col_name = !!col_name) %>% 
      summarise(Percentages = 100 * length(!!col_name) / nrow(df)) 

  }

  temp
}

test %>% prop_tab(column = C) 
# example dataset
# # A tibble: 5 x 2
#   col_name Percentages
#      <dbl>       <dbl>
# 1        1          40
# 2        2          10
# 3        3          20
# 4        4          20
# 5        5          10

test %>% prop_tab(column = A, group = B)
# # A tibble: 3 x 2
#       B Percentages
#   <dbl>       <dbl>
# 1     1          20
# 2     2          20
# 3     3          60

You can also use length(substitute(group)) instead of !missing(group), it'll be more robust as it won't fail in the unlikely case where someone fills explicitely the group argument with NULL (the former option will crash in this case).

moodymudskipper
  • 46,417
  • 11
  • 121
  • 167
0

One option would be to check on the "group_name" instead of the 'group'

prop_tab <- function(df, column, group = NULL) {

  col_name <- enquo(column)
  group_name <- enquo(group)

  # if group_by var is NOT null, then...
  if(as.character(group_name)[2] != "NULL") {
      temp <- df %>%
        select(!!col_name, !!group_name) %>% 
        group_by(!!group_name) %>% 
        summarise(Percentages = 100 * length(!!col_name) / nrow(df))

  } else {
  # if group_by var is null, then...
      temp <- df %>%
        select(!!col_name) %>% 
        group_by(col_name = !!col_name) %>% 
        summarise(Percentages = 100 * length(!!col_name) / nrow(df)) 

  }

  temp
}

-checking

prop_tab(test, column = C, group = B)
# A tibble: 3 x 2
#<     B Percentages
# <dbl>       <dbl>
#1     1          20
#2     2          20
#3     3          60  



prop_tab(test, column = C)
# A tibble: 5 x 2
#  col_name Percentages
#     <dbl>       <dbl>
#1        1          40
#2        2          10
#3        3          20
#4        4          20
#5        5          10
akrun
  • 874,273
  • 37
  • 540
  • 662