A slight expansion on Dave2e's solution demonstrating how to get the XHR JSON resource with splashr
:
library(splashr) # devtools::install_github("hrbrmstr/splashr)
library(tidyverse)
splashr
requires a Splash server and the pkg provides a way to start one with Docker. Read the help on the github pg and inside the pkg to find out how to use that.
vm <- start_splash()
URL <- "http://environment.data.gov.uk/ds/survey/index.jsp#/survey?grid=TQ38"
This retrieves all the resources loaded by the page:
splash_local %>% render_har(URL) -> resources # get ALL the items the page loads
stop_splash(vm) # we don't need the splash server anymore
This targets the background XHR resource with catalogName
in it. You'd still need to hunt to find this initially, but once you know the pattern, this becomes a generic operation for other grid points.
map_chr(resources$log$entries, c("request", "url")) %>%
grep("catalogName", ., value=TRUE) -> files_json
files_json
## [1] "http://www.geostore.com/environment-agency/rest/product/OS_GB_10KM/TQ38?catalogName=Survey"
Read that in:
guids <- jsonlite::fromJSON(files_json)
glimpse(guids)
## Observations: 98
## Variables: 12
## $ id <int> 170653, 170659, 170560, 170565, 178307, 178189, 201556, 238...
## $ guid <chr> "54595a8c-b267-11e6-93d3-9457a5578ca0", "63176082-b267-11e6...
## $ pyramid <chr> "LIDAR-DSM-1M-ENGLAND-2003-EA", "LIDAR-DSM-1M-ENGLAND-2003-...
## $ tileReference <chr> "TQ38", "TQ38", "TQ38", "TQ38", "TQ38", "TQ38", "TQ38", "TQ...
## $ fileName <chr> "LIDAR-DSM-1M-2003-TQ3580.zip", "LIDAR-DSM-1M-2003-TQ3585.z...
## $ coverageLayer <chr> "LIDAR-DSM-1M-ENGLAND-2003-EA-MD-YY", "LIDAR-DSM-1M-ENGLAND...
## $ fileSize <int> 76177943, 52109669, 59326278, 18048623, 13204420, 11919071,...
## $ descriptiveName <chr> "LIDAR Tiles DSM at 1m spatial resolution 2003", "LIDAR Til...
## $ description <chr> "1m", "1m", "1m", "1m", "1m", "1m", "1m", "1m", "1m", "1m",...
## $ groupName <chr> "LIDAR-DSM-TIMESTAMPED-ENGLAND-2003-EA", "LIDAR-DSM-TIMESTA...
## $ displayOrder <int> -100, -100, -100, -100, -100, -100, -100, -100, -100, -100,...
## $ metaDataUrl <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "https://data.g...
The rest is similar to the other answer:
dl_base <- "http://www.geostore.com/environment-agency/rest/product/download"
urls <- sprintf("%s/%s", dl_base, guids$guid)
Be kind to your network and their server:
walk2(urls, guids$fileName, download.file)
Do this if you think your system and their server can handle 98 simultaneous 70-100MB file downloads
download.file(urls, guids$fileName)