One method to retrieve html is through Chromote javascript evaluation:
library(chromote)
library(rvest)
b <- ChromoteSession$new()
{
b$Page$navigate("https://www.ooir.org/")
b$Page$loadEventFired()
}
#> $timestamp
#> [1] 73090.44
# evaluate js in Chromeote and work with returned string
b$Runtime$evaluate("document.querySelector('html').outerHTML")$result$value %>%
read_html() %>%
html_elements("a") %>%
head()
#> {xml_nodeset (6)}
#> [1] <a href="index.php">\n <div class="float-o1">O</di ...
#> [2] <a href="index.php" class="active_menu">Trending Research</a>
#> [3] <a href="journals.php">Journal Rankings</a>
#> [4] <a href="about.php">About</a>
#> [5] <a href="#" class="clicksmall" onclick="show()"><b>Field of Research</b>: ...
#> [6] <a href="index.php?field=Agricultural+Sciences" class="clicksmall">Agricu ...
You could also work with b$DOM
, missing link between that and rvest
looks something like this:
x <- b$DOM$getDocument()
x$root$nodeId %>%
b$DOM$querySelector("html") %>%
`[[`(1) %>%
b$DOM$getOuterHTML() %>%
`[[`(1) %>%
read_html() %>%
html_elements("a") %>%
head()
#> {xml_nodeset (6)}
#> [1] <a href="index.php">\n <div class="float-o1">O</di ...
#> [2] <a href="index.php" class="active_menu">Trending Research</a>
#> [3] <a href="journals.php">Journal Rankings</a>
#> [4] <a href="about.php">About</a>
#> [5] <a href="#" class="clicksmall" onclick="show()"><b>Field of Research</b>: ...
#> [6] <a href="index.php?field=Agricultural+Sciences" class="clicksmall">Agricu ...
Created on 2023-05-27 with reprex v2.0.2