Crude way (I'll update this in a sec):
ini::read.ini("https://rud.is/dl/example.url")$InternetShortcut$URL
## [1] "https://rud.is/b/2017/11/11/measuring-monitoring-internet-speed-with-r/"
Made slightly less crude:
read_url_shortcut <- function(x) {
require(ini)
x <- ini::read.ini(x)
x[["InternetShortcut"]][["URL"]]
}
Without the ini
package dependency:
read_url_shortcut <- function(x) {
x <- readLines(x)
x <- grep("^URL", x, value=TRUE)
gsub("^URL[[:space:]]*=[[:space:]]*", "", x)
}
More "production-worthy" version:
#' Read in internet shortcuts (.url or .webloc) and extract URL target
#'
#' @param shortcuts character vector of file path+names or web addresses
#' to .url or .webloc files to have URL fields extracted from.
#' @return character vector of URLs
read_shortcut <- function(shortcuts) {
require(ini)
require(xml2)
require(purrr)
purrr::map_chr(shortcuts, ~{
if (!grepl("^http[s]://", .x)) {
.x <- path.expand(.x)
if (!file.exists(.x)) return(NA_character_)
}
if (grepl("\\.url$", .x)) {
.ini <- suppressWarnings(ini::read.ini(.x)) # get encoding issues otherwise
.ini[["InternetShortcut"]][["URL"]][1] # some evidence multiple are supported but not sure so being safe
} else if (grepl("\\.webloc$", .x)) {
.x <- xml2::read_xml(.x)
xml2::xml_text(xml2::xml_find_first(.x, ".//dict/key[contains(., 'URL')]/../string"))[1] # some evidence multiple are supported but not sure so being safe
} else {
NA_character_
}
})
}
Ideally, such a function would return a single data frame row with all relevant info that could be found (title, URL and icon URL, creation/mod dates, etc). I'd rather not keep my Windows VM up long enough to generate sufficient samples to do that.
NOTE: Said "production"-ready version still doesn't gracefully handle edge cases where the file or web address is not readable/reachable nor does it deal with malformed .url
or .webloc
files.