0

I'm newish to R (and programming in general) and am automating myself out of a job ;)

I have written a script that (1) takes a CSV file of "API numbers," (2) finds and downloads an HTML table for each API number, and (3) saves the info as a CSV table. It works - it's just not pretty. One of the problems is the website I'm downloading the data from gives a 500 Internal Server Error sometimes. In order to address the website's sporadic availability, I have built some real ugly nested if statements that delay the script for increasing amounts of time. It's overkill, but I don't want the download to fail when I leave it overnight.

I'm looking for feedback on the workaround download delay. Is there a better way to do this? Is there a way to tell R to keep trying the download until it's successful?

This script will download data and save each API number as a separate CSV. The example list of API numbers has 60. You can find it here: https://www.dropbox.com/s/fwvcxun8hr0xy4n/API%20List.csv?dl=0

Thanks in advance!

######################### User-Defined Parameters ##########################################
### Specify where the API list is and where to download temp data

welllist = ".../API List.csv" # each API will have a seperate CSV in this directory as well
tempdata = ".../tempdata.txt"


######################### Get API List and Parse API ##########################################

wells = read.csv(file = welllist, header = 1, sep = ",")

colnum = 1
rownum = nrow(wells)
API = data.frame(1:rownum,"A","B","C",stringsAsFactors = F)
colnames(API) = c("number", "type","county","sequence")

for (i in 1:rownum) {
  current = toString(wells[i,colnum])

  dashloc = as.data.frame(gregexpr(pattern = "-", text = current))

  type = substr(x = current, start = 0, stop = dashloc[1,1]-1)
  if (type != "05") {print(paste("WARNING! API DOES NOT BEGIN WITH 05", "- WELL", i,wells[i,2]))}
  county = substr(x = current, start = dashloc[1,1]+1, stop = dashloc[2,1]-1)
  sequence = substr(x = current, start = dashloc[2,1]+1, stop = nchar(current))
  API$type[i] = type
  API$county[i] = county
  API$sequence[i] = sequence
}

######################### Download the Data ##########################################

end = nrow(API) 
for (i in 1:end) {
  county = API$county[i]
  sequence = API$sequence[i]

  dataurl = paste("http://cogcc.state.co.us/production/?&apiCounty=",county,"&apiSequence=",sequence,sep = "")

  ### ***** U-G-L-Y Retry Data Download if Server Error or if File Size is Too Small ***** ###
  err <- try(download.file(url = dataurl, destfile = tempdata, quiet = F, mode = "wb")) 
  if (class(err) == "try-error" || file.size(tempdata) < 300000) {
    Sys.sleep(2)
    err <- try(download.file(url = dataurl, destfile = tempdata, quiet = F, mode = "wb"))
  }
  if (class(err) == "try-error" || file.size(tempdata) < 300000) {
    Sys.sleep(4)
    err <- try(download.file(url = dataurl, destfile = tempdata, quiet = F, mode = "wb"))
  }
  if (class(err) == "try-error" || file.size(tempdata) < 300000) {
    Sys.sleep(8)
    err <- try(download.file(url = dataurl, destfile = tempdata, quiet = F, mode = "wb"))
  }
  if (class(err) == "try-error" || file.size(tempdata) < 300000) {
    Sys.sleep(16)
    err <- try(download.file(url = dataurl, destfile = tempdata, quiet = F, mode = "wb"))
  }
  if (class(err) == "try-error" || file.size(tempdata) < 300000) {
    Sys.sleep(32)
    err <- try(download.file(url = dataurl, destfile = tempdata, quiet = F, mode = "wb"))
  }
  if (class(err) == "try-error" || file.size(tempdata) < 300000) {
    Sys.sleep(64)
    err <- try(download.file(url = dataurl, destfile = tempdata, quiet = F, mode = "wb"))
  }
  if (class(err) == "try-error" || file.size(tempdata) < 300000) {
    Sys.sleep(128)
    err <- try(download.file(url = dataurl, destfile = tempdata, quiet = F, mode = "wb"))
  }
  if (class(err) == "try-error" || file.size(tempdata) < 300000) {
    Sys.sleep(256)
    err <- try(download.file(url = dataurl, destfile = tempdata, quiet = F, mode = "wb"))
  }
  if (class(err) == "try-error" || file.size(tempdata) < 300000) {
    Sys.sleep(512)
    err <- try(download.file(url = dataurl, destfile = tempdata, quiet = F, mode = "wb"))
  }
  if (class(err) == "try-error" || file.size(tempdata) < 300000) {
    Sys.sleep(1024)
    err <- try(download.file(url = dataurl, destfile = tempdata, quiet = F, mode = "wb"))
  }
  if (class(err) == "try-error" || file.size(tempdata) < 300000) {
    write.csv(x = paste("Error downloading", sequence, "at", Sys.time()), file = paste(dirname(wells),"errorlog.txt",sep = "/"))
    next
  }
  ### Save the CSV ###

  write.csv(x = tempdata, file = paste(dirname(welllist),"/",sequence,"_production.csv",sep = ""))
}

Periodically, the website breaks and gives: HTTP status was '500 Internal Server Error'

ghosh'.
  • 1,567
  • 1
  • 14
  • 19
Nate
  • 11
  • 3

0 Answers0