This is a very approximate gist of what you need:
class Parser
attr_accessor :pages
def fetch_all(host)
@host = host
fetch(@host)
end
private
def fetch(url)
return if pages.any? { |page| page.url == url }
parse_page(Nokogiri::HTML(open(url).read))
end
def parse_page(document)
links = extract_links(document)
pages << Page.new(
url: url,
title: document.at_css('title').text,
content: document.to_html,
links: links
)
links.each { |link| fetch(@host + link) }
end
def extract_links(document)
document.css('a').map do |link|
href = link['href'].gsub(@host, '')
href if href.start_with?('/')
end.compact.uniq
end
end
class Page
attr_accessor :url, :title, :html_content, :links
def initialize(url:, title:, html_content:, links:)
@url = url
@title = title
@html_content = html_content
@links = links
end
end