I faced a similar issue and the only alternative I could find is to use subprocess
and phantomjs
Here is the Python code
import json, subprocess
output = check_output(['phantomjs', 'getResources.js', main_url])
urls = json.loads(output)
for url in urls:
#filter and process URLs
and the Javascript file content
// getResources.js
// Usage:
// phantomjs getResources.js your_url
var page = require('webpage').create();
var system = require('system');
var urls = Array();
page.onResourceRequested = function(request, networkRequest) {
urls.push(request.url)
};
page.onLoadFinished = function(status) {
setTimeout(function() {
console.log(JSON.stringify(urls));
phantom.exit();
}, 16000);
};
page.onResourceError = function() {
return false;
}
page.onError = function() {
return false;
}
page.open(system.args[1]);
PhantomJS supports various options as well; for example to change the user agent you can use something like this:
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) ...';
This is a simplified version of this answer which I used for my issue.