I have a simple script which checks for bad url's:
def self.check_prod_links
require 'net/http'
results = []
Product.find_each(:conditions =>{:published => 1}) do |product|
url = product.url
id = product.id
uri = URI(url)
begin
response = Net::HTTP.get_response(uri)
rescue
begin
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
request = Net::HTTP::Get.new(uri.request_uri)
response = http.request(request)
rescue
begin
response = Net::HTTP.get_response("http://" + uri)
rescue => e
p "Problem getting url: #{url} Error Message: #{e.message}"
end
end
end
p "Checking URL = #{url}. ID = #{id}. Response Code = #{response.code}"
unless response.code.to_i == 200
product.update_attribute(:published, 0)
results << product
end
end
return results
end
How can I allow incorrectly formatted urls eg: hkbfksrhf.google.com to not crash the script with the following error:
getaddrinfo: nodename nor servname provided, or not known
I just want the task to run till the end, and print any/all errors that are not a 200 and 301 http response.
Thanks!