5

I want to create a function that takes a function and applies it once for every row in a tibble with arguments stored in the correspondingly named columns of the tibble I realize that this sounds a bit odd, but I want the user facing function / functionality be simple.

The processing will take a lot of time in most cases, so I would really prefer to have progress bar functionality, and this is where I found great trouble:

This code works (with no progress bar then):

library(tibble)
library(dplyr)
library(purrr)
library(furrr)
library(tidyr)
library(wrassp)
library(progressr)

xf <- function(x,trim,na.rm,ds="ded"){
  return(x*trim*na.rm)
}

xf2 <- function(x,trim,na.rm,ds="ded"){
  return(list("a"=x,"b"=trim))
}

xf3 <- function(x,trim,na.rm,ds="ded"){
  return(data.frame("a"=x,"b"=trim))
}

mymap <- function(f,...){
  plan(multisession)
  exDF <- tribble(
    ~x, ~trim, ~na.rm, ~notarg, ~listOfFiles, ~toFile,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE, 
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE
  )

  dotArgs <- list(...)
  dotArgsRT <- as_tibble_row(dotArgs)

  dotArgsNames <- names(dotArgs)
  
  allArgsNames <- formalArgs(f)
  
  exDF %>% 
    select(-any_of(!!dotArgsNames)) %>%
    bind_cols(dotArgsRT) %>% 
    select(any_of(allArgsNames)) %>%
    rowwise() %>% 
    mutate(temp = list(future_pmap(.,.f=f,.progress=FALSE))) %>% 
    tibble::rownames_to_column(var = "sl_rowIdx") %>% 
    mutate(out = list(map(temp,as_tibble))) %>%
    select(-temp) %>% 
    unnest(out) %>%
    unnest(out)
}

mymap(xf,c=20,a=20,ds=1) 
mymap(xf2,c=20,a=20,ds=1)
mymap(xf3,c=20,a=20,ds=1)

This code kind of works (sorry for the extended example, but I want to force a progress bar to be presented):

library(tibble)
library(dplyr)
library(purrr)
library(furrr)
library(tidyr)
library(wrassp)
library(progressr)

xf <- function(x,trim,na.rm,ds="ded"){
  return(x*trim*na.rm)
}

mymap <- function(f,...){
  plan(multicore)
  exDF <- tribble(
    ~x, ~trim, ~na.rm, ~notarg, ~listOfFiles, ~toFile,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE, 
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE
  )
  
  exDF <- exDF %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) 
  exDF <- exDF   %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF)
  
  dotArgs <- list(...)
  dotArgsRT <- as_tibble_row(dotArgs)
  
  dotArgsNames <- names(dotArgs)
  
  allArgsNames <- formalArgs(f)

  p <- progressr::progressor(steps = nrow(exDF))

  pWrap <- function(fun=f,...){
    
    iDA <- list(...)
    p(message="processing")
    #Sys.sleep(0.1)
    do.call(fun,iDA)
    
  }

 out <- exDF %>% 
    select(-any_of(!!dotArgsNames)) %>%
    bind_cols(dotArgsRT) %>% 
    select(any_of(allArgsNames)) %>%
    rowwise() %>% 
    mutate(temp = list(future_pmap(.,.f=pWrap))) %>% 
    tibble::rownames_to_column(var = "sl_rowIdx") %>% 
    mutate(out = list(map(temp,as_tibble))) %>%
    select(-temp) %>% 
    unnest(out) %>%
    unnest(out)

    return(out)


}

mymap(xf,c=20,a=20,ds=1)

But the progress bar is not displayed if I call the function like that, but only if I call it this way:

with_progress(mymap(xf,c=20,a=20,ds=1))

And, the progress bar appears very quickly, disappears and then the function processes the data for a time and then returns the data.

So the progress bar is not really informative of the overall progression of the function to the user.

I guess it has to do with the dplyr calls being evaluated at the point where a return value is expected ?

But, how do I then force the progress bar to be in sync with that process?

I have tried using just pmap rather than future_pmap to solve the potential issue of the value not being resolved yet, but it seems to not be the issue.

I appreciate all the help I can get on this.

Fredrik Karlsson
  • 485
  • 8
  • 21
  • starting with initial 2 rows `exDF`, why does `mymap` return 4 rows? Is this the expected output? I understand that you want to apply f `rowwise`, so that I would expect 2 rows as output. – Waldi Sep 01 '22 at 07:43

1 Answers1

2
library(tidyverse)
library(furrr)
library(progressr)

xf <- function(x,trim,na.rm,ds="ded"){
  Sys.sleep(sample.int(20,1)/10)
  return(x*trim*na.rm)
}

mymap <- function(f,...){
  handlers(global = TRUE)
  
  plan(multisession)
  exDF <- tribble(
    ~x, ~trim, ~na.rm, ~notarg, ~listOfFiles, ~toFile,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE, 
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE,
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE,
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE,
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE,
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE,
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE,
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE,
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE
  )
  
  dotArgs <- list(...)
  dotArgsRT <- as_tibble_row(dotArgs)
  
  dotArgsNames <- names(dotArgs)
  
  allArgsNames <- formalArgs(f)
  
   
  setupdf <-   exDF %>% 
    select(-any_of(!!dotArgsNames)) %>%
    bind_cols(dotArgsRT) %>% 
    select(any_of(c(allArgsNames,"id"))) %>%
    rowwise() 
  
  num_to_do <- nrow(setupdf)
  cat("\nWill be doing ",num_to_do,"\n")
  p <- progressor(num_to_do)

  
  f2 <- function(...){
    result <- f(...)
    p()
    result
  }
    part1 <- setupdf%>% 
    mutate(temp = list(pmap(cur_data(),.f=f2))) 
  
    print("calculationa are complete ")
    
    part1 %>%
    tibble::rownames_to_column(var = "sl_rowIdx") %>% 
    mutate(out = list(map(temp,as_tibble))) %>%
    select(-temp) %>% 
    unnest(out) %>%
    unnest(out)
}



mymap(xf,c=20,a=20,ds=1) 
Nir Graham
  • 2,567
  • 2
  • 6
  • 10
  • Thanks @Nir Graham, but this solution has the same problem as my original. The function displays the progress bar to the user, but processing continues for a _long_ while after the progress bar has disappeared. So the progress bar really does not reflect the true progress of the function. – Fredrik Karlsson Sep 01 '22 at 11:58
  • Also, the user has to set upp the display of the progress bar him/her self which is not so helpful, as it will then be obvious after a while of processing (too late ?) that the progress bar would actually be very good to have. So, how do I force the progress bar to be displayed when the evaluation actually occurs, and without relying on the user to set it up? – Fredrik Karlsson Sep 01 '22 at 12:03
  • 1
    I think there was a problematic issue with `list(future_pmap(.,.f=f2)` which should rather be `list(future_pmap(cur_data(),.f=f2)`; without this, there was a lot more execution time than was shown as progressed because more calculations were happening than were logged somehow. with_progress is not needed as I had set global handler, you could stuff that into the function definition potentially. but it might be good to let users alter the behaviour. I'll edit my answer to improve it – Nir Graham Sep 01 '22 at 14:05