I'm trying to set up a basic scraper using request and cheerio. The problem is that I need to GET the websites from different geos to monitor differences in the ads being displayed and the actual content. It was fairly easy to set it up without the proxy part, but when I add a proxy in the request options, it doesn't work (i.e: I get the content based on my actual location).
Just in case, this is totally for educational purposes.
This is the basic code I'm using (I'm a total newbie):
var request = require('request');
var cheerio = require("cheerio");
request = request.defaults({jar: true});
var options = {
headers: {
'User-Agent': 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; rv:1.9.2.16) Gecko/20110319 Firefox/3.6.16'
},
proxy: 'http://myusername@gmail.com:mypassword@proxy-provider's-URL'
};
request(options, function () {
request('http://www.the-website-to-scrape.com', function (error, response, body) {
var $ = cheerio.load(body);
$("div").each(function(i, e) {
var result= $(e).find('h3>a').text();
console.log("Result: " + result);
});
});
});
Any ideas on how to make this work? This is my first question ever on stackoverflow and I've only started coding a couple months ago.
Thanks in advance!