EDIT
I have a Shiny app where I need to make successive requests to an API. Every request needs the preceding one to be completed to prevent the not all data was parsed (0 chars were parsed out of a total of 160 chars
error.
I use requests like fromJSON(content(GET('https://...')), as = "text"))
with the httr package or curl_fetch_memory(fromJSON('https://...'))
with the curl package. Unfortunately, when running successive JSON requests, I'm almost always getting the error that not all data was parsed. I'm obliged to pause the system with Sys.sleep(quite_a_long_time)
to prevent the error. But this is of course not optimal.
How could I make sure the data is ready before going on ? I know there is some callback functions that can be implemented somewhere, but I'm not sure how to implement that.
EDIT: I'm using a GET request inside a function that is called in a lapply(). Maybe the problem arises because of successive new GET request as in a for loop ? Is there a need to make different sessions ?
Here is what I tried:
pgkbData <- reactive({
# current dci filtered
#drugs_tmp <- dciCypInput()$x$data$Molécule # dciCypInput is made of a datatable containing drugs name
#drugs_tmp <- dci_all_en %>% # dci_all_en is made of a dataframe containing french-english translated drug names
filter(fr %in% drugs_tmp) %>%
pull(en)
drugs_tmp <- c('acenocoumarol', 'tacrolimus') # for reproducible example
drugs <- stri_trans_general(str = drugs_tmp, id = "Latin-ASCII") # removes accents
drugs <- gsub(" ", "%20", drugs) # Convert white spaces to %20 to make it compatible with URL
genes <- genoCypInput()$x$data$Enzyme
if (length(drugs) == 0 | length(genes) == 0) {
alert('...')
}
drug_id_json <- lapply(drugs, fn_pgkb_drug_id) # defined in separate .R file
drug_ids <- sapply(drug_id_json, function(item) item$data[[1]]$id) # extract the ids
gene_id_json <- lapply(genes, fn_pgkb_gene_id) # find genes IDs
gene_ids <- sapply(gene_id_json, function(item) item$data[[1]]$id) # extract the ids
drugs_genes <- expand_grid(drugs, genes) # all drug-gene combinations
drugsId_genesId <- expand_grid(drug_ids, gene_ids) # all drug-gene ids combinations
final_df <- cbind(drugs_genes, drugsId_genesId)
total <- nrow(final_df)
for (source in c('cpic', 'dpwg')) { # look for CPIC and DPWG guidelines
guidelines_json <- lapply(1:nrow(drugs_genes), # synthax to apply with multiple arguments
function(i) fn_pgkb_guideline(drug_id = drugsId_genesId$drug_ids[i],
drug = drugs_genes$drugs[i],
gene_id = drugsId_genesId$gene_ids[i],
gene = drugs_genes$genes[i],
source = source,
total = total,
i = i))
guidelines_status <- sapply(guidelines_json,
function(item) item$status) # e.g., 'fail', 'success',...
guidelines_summary <- sapply(guidelines_json,
function(item) item$data[[1]]$summaryMarkdown$html)
guidelines_summary[sapply(guidelines_summary, function(x) length(x) == 0L)] <- NA # Replace NULL values with NA to keep a 6-element vector (otherwise NULL are dropped with unlist())
# urls <- sapply(guidelines_json, function(item) item$data[[1]]$crossReferences[[1]]$resourceId)
urls <- sapply(guidelines_json, function(item) {
if (length(item$data[[1]]$crossReferences) > 0) {
return(item$data[[1]]$crossReferences[[1]]$resourceId)
} else {
return(NA)
}
})
# guidelines_url <- lapply(urls, function(url) HTML(paste0("<a href='", url, "' target='_blank'>Voir recommendation</a>")))
guidelines_url <- lapply(urls, function(url) {
if (!is.na(url)) {
link_text <- "Voir recommandation"
link_html <- HTML(paste0("<a href='", url, "' target='_blank'>", link_text, "</a>"))
return(link_html)
}
else {
return("")
}
})
final_df <- final_df %>%
cbind(status = guidelines_status,
source = unlist(guidelines_summary),
lien = unlist(guidelines_url))
}
names(final_df)[5] = "status_CPIC"
names(final_df)[6] = "CPIC"
names(final_df)[7] = "lien (CPIC)"
names(final_df)[8] = "status_DPWG"
names(final_df)[9] = "DPWG"
names(final_df)[10] = "lien (DPWG)"
final_df <- final_df %>%
filter(status_CPIC == 'success' | status_DPWG == 'success')
datatable(final_df,
#extensions = c("Buttons"),
rownames = FALSE,
filter = 'top',
selection = "none",
editable = FALSE,
escape = FALSE,
options = list(
# search = list(regex = TRUE, caseInsensitive = TRUE),
pageLength = 20,
dom = 'lftipr'
# buttons = list(list(extend = "copy", text = '<span class="fa-solid fa-copy"></span> Copier', title = ''))
#
)
)
})
custom methods in separate .R file:
pgkb_get_prefix <- 'https://api.pharmgkb.org/v1/data/'
pgkb_drugId_url <- function(chemical_name) {
url <- paste0(pgkb_get_prefix, 'chemical?name=', chemical_name)
return(url)
}
fn_pgkb_drug_id <- function(chemical_name) {
pgkb_drug_id <- content(GET(paste0(pgkb_get_prefix,
'chemical?name=',
chemical_name)))
short_notification(duration = 0.8, message = paste0('ID (', chemical_name, ') (1/3)'))
Sys.sleep(0.8)
return(pgkb_drug_id)
}
fn_pgkb_gene_id <- function(hgng_gene) {
pgkb_gene_id <- content(GET(paste0(pgkb_get_prefix,
'gene?symbol=',
hgng_gene)))
short_notification(duration = 0.8, message = paste0('ID (', hgng_gene, ') (2/3)'))
Sys.sleep(0.8)
return(pgkb_gene_id)
}
fn_pgkb_guideline <- function(drug_id, drug, gene_id, gene, source, total, i) {
result <- content(GET(paste0(pgkb_get_prefix,
'guidelineAnnotation?source=',
source,
'&relatedChemicals.accessionId=',
drug_id,
'&relatedGenes.accessionId=',
gene_id)))
Sys.sleep(1)
status <- result$status
short_notification(duration = 1, message = paste0(drug, ' / ', gene, ' (', source, ') '))
short_notification(duration = 1, message = paste0('Status: ', status, ' (', i, '/', total, ')'))
# if (status == "fail") {
# error <- result$data$errors[[1]]$message
# print(paste('error =', error))
# }
return(result)
}
I read on the API documentation that only 2 requests per second are allowed (this is probably the reason why I have sometimes no data back - if more than 2 requests reach the API within one second - and why I will have to pause the system). The idea would be to pause for 0.5 seconds once data from the previous request is available to make sure I use Sys.sleep() for the minimal time that would be necessary.