I have a problem with casperjs/phantomjs. I wrote a script to gather all .xls/.xlsx files from a website. That worked. Now I have extendet my script to gather these files from a predefined array of urls. The strange thing I came across is that the download actually works. I wanted to save all files of either one of the sites in a separate folder named accordingly to the website where the file had been gathered from.
Example: All the files of http://minerals.usgs.gov/minerals/pubs/commodity/aluminum/ should be saved in a folder ...\Data\aluminum\
strangely (even though the download works) the script keeps using just one adress from the array (the very last one to be exact). Thus all files are stored in a folder named after the last website used to gather files.
I hope you can understand what I mean. Underneath you find my code...
var links = [];
var index;
var url = {
'abrasives': 'http://minerals.usgs.gov/minerals/pubs/commodity/abrasives/',
'aluminum': 'http://minerals.usgs.gov/minerals/pubs/commodity/aluminum/',
'antimony': 'http://minerals.usgs.gov/minerals/pubs/commodity/antimony/'
};
var casper = require('casper').create();
function getLinks() {
var links = document.querySelectorAll('a');
return Array.prototype.map.call(links, function(e) {
return e.getAttribute('href');
});
}
casper.start('http://google.com', function() {
for(var k in url){
this.thenOpen(url[k], function(){
var j=0;
if(url.hasOwnProperty(k)){
this.echo("Key is " + k + ",value is " + url[k]);
links = this.evaluate(getLinks);
this.echo(links.length + ' Links gefunden...');
for(index= 0; index < links.length;index++){
if ((new RegExp('.xls')).test(links[index]) || (new RegExp('.xlsx')).test(links[index])) {
j++;
var folder = url[k].split('/');
// the next line was for testing purposes
this.echo(folder[6]);
var filename = links[index].replace(/^.*[\\\/]/, '')
this.echo(j + ' Excel-Files found at ' + this.getTitle() + "!");
this.download(links[index],'Data\\' + folder[folder.length] + '\\'+filename);
}
}
}
});
}
});
casper.run(function() {
this.echo('All files stored at C:\\User\\Username\\Data\\ .');
this.echo('End...').exit();
});