Below is my code, currently this works fine.. but i want to optimize it to not load / download some resources like (fonts, images, css, js).. I've read the api docs but i'mnot able to find the related configs.. Well, I'm using webdriverIO and phantomjs
as browser..
'use strict';
var _ = require('lodash');
var webdriverio = require('webdriverio');
var cheerio = require('cheerio');
/**
* Base class for browser based crawler.
* To run this crawler you need to first run phantomJS with webdriver on localhost
* ```
* ./phantomjs --webdriver 4444
* ```
*/
class BaseWebdriverIO {
/**
* Constructor
* @param opts - webdriverio config http://webdriver.io/guide/getstarted/configuration.html
*/
constructor(opts) {
this.opts = _.defaults(opts || {}, {
desiredCapabilities: {
browserName: 'phantomjs'
}
});
}
/**
* webdriver and parse url func
* @param parseUrl
* @returns {Promise}
*/
parse(parseUrl) {
console.log("getting url", parseUrl);
return webdriverio.remote(this.opts)
.init()
.url(parseUrl)
.waitForVisible('body')
.getHTML('body', false, function(err, html) {
if (err) {
throw new Error(err);
}
this.end();
return cheerio.load(html);
});
}
}
module.exports = BaseWebdriverIO;
I'm not able to find any documentation related this. Can anyone tell me, How can I do that?
Edit/Update: I've found a working example which optimize images to not load by using setting phantomjs.cli.args
from here: https://github.com/angular/protractor/issues/150#issuecomment-128109354 Some basic settings have been configured and works fine though, this is the modified desiredCapabilities
settings object:
desiredCapabilities: {
'browserName': 'phantomjs',
'phantomjs.binary.path': require('phantomjs').path,
'phantomjs.cli.args': [
'--ignore-ssl-errors=true',
'--ssl-protocol=any', // tlsv1
'--web-security=false',
'--load-images=false',
//'--debug=false',
//'--webdriver-logfile=webdriver.log',
//'--webdriver-loglevel=DEBUG',
],
javascriptEnabled: false,
logLevel: 'verbose'
}
And css/fonts optimization i 've found question raised on stack overflow How can I control PhantomJS to skip download some kind of resource? and the solution to this discussed there is something like this:
page.onResourceRequested = function(requestData, request) {
if ((/http:\/\/.+?\.css/gi).test(requestData['url']) || requestData['Content-Type'] == 'text/css') {
console.log('The url of the request is matching. Aborting: ' + requestData['url']);
// request.abort();
request.cancel();
}
};
But I 'm not able trigger this function via in webdriverIO's configs desiredCapabilities
object.. i.e., onResourceRequested()
..
Can anyone tell me how can i call/define this function in my WebdriverIO script capabilities or any other way? Thanks.