I am using PhantomJS to scrape some websites and therefore extract information with r. I am following this tutorial. Everything works fine for a single page, but I couldn't find any simple tutorial on how to automate for multiple pages. My experiments so far:
var countries = [ "Albania" ,"Afghanistan"];
var len = countries.length;
var name1 = ".html";
var add1 = "http://www.kluwerarbitration.com/CommonUI/BITs.aspx?country=";
var country ="";
var name ="";
var add="";
for (i=1; i <= len; i++){
country = countries[i]
name = country.concat(name1)
add = add1.concat(name1)
var webPage = require('webpage');
var page = webPage.create();
var fs = require('fs');
var path = name
page.open(add, function (status) {
var content = page.content;
fs.write(path,content,'w')
phantom.exit();
});
}
I don't seem to get any error when running the code, the script creates a html file only for the second country, which contains all information on the page exception made for the small table I am interested in.
I tried to gather some information from similar questions. However, also because I couldn't find a simple reproducible example, I don't understand what I am doing wrong.