I am building a script using Mechanize to scrape data from a website. The script is supposed to click on the "Read biography" link and then scrape the biography of the member on the next page.
Here is the script in the Rake file:
require 'mechanize'
require 'date'
require 'json'
task :testing2 do
agent = Mechanize.new
page = agent.get("https://www.congress.gov/members")
page_links = page.links_with(href: %r{.*/member/\w+})
member_links = page_links[0...2]
members = member_links.map do |link|
member = link.click
name = member.search('title').text.split('|')[0]
institution = member.search('td~ td+ td').text.split(':')[0]
dob = member.search('.birthdate').text.strip[1..4]
# Get bio
bio_link = member.link_with(:text => 'Read biography').click
bio = bio_page.search('p').text.strip
{
name: name.strip,
institution: institution.strip,
dob: dob,
bio: bio
}
end
puts JSON.pretty_generate(members)
end