0

I'm trying to scrape page content from url https://app.kabuto.com/auth using phantomjs (though my target is to use selenium phantomjs webdriver in python eventually). Below phantom script runs fine with no errors but still I cannot get the page text ('Email', 'Password' etc.). These are form elements and headings that I can see from page's 'Inspect Element' whereas 'View Page Source' only contains javascript. I might be doing some mistake as I'm a newbie to phantom and lacks knowledge of javascript, any help is appreciated!

try {
    var system = require('system'); 
    var url = 'https://app.kabuto.com/auth'
    var page = require('webpage').create();

    console.log('The default user agent is ' +   page.settings.userAgent);
    page.settings.userAgent = "SpecialAgent"

    function printArgs() {
        var i, ilen;
        for (i = 0, ilen = arguments.length; i < ilen; ++i) {
            console.log("    arguments[" + i + "] = " +  JSON.stringify(arguments[i]));
        }
        console.log("");
    }
    page.onInitialized = function() {
        console.log("page.onInitialized");
        printArgs.apply(this, arguments);
    };
    page.onResourceError = function(resourceError) {
        page.reason = resourceError.errorString;
        page.reason_url = resourceError.url;
    };
    page.onLoadStarted = function() {
        console.log("page.onLoadStarted");
        printArgs.apply(this, arguments);
    };
    page.onLoadFinished = function() {
        console.log("page.onLoadFinished");
        printArgs.apply(this, arguments);
    };
    page.onResourceRequested = function(request) {
        console.log('Request ' + JSON.stringify(request, undefined, 4));
    };
    page.onResourceReceived = function(response) {
        console.log('Receive ' + JSON.stringify(response, undefined, 4));
    };
    page.onError = function(msg) {
        console.log('**some js error**' + msg);
    };    
    page.onConsoleMessage = function(msg, lineNum, sourceId) {
        console.log('CONSOLE: ' + msg);
    };
    page.open(url, function (status) {
        if (status !== 'success') {
            console.log( 'Unable to access network' );
        } 
        else {
            window.setTimeout(function () {
                page.evaluate(function(){
                    console.log('innerText is:' + window.document.body.innerText);
                    console.log('outerHTML is: ' + document.documentElement.outerHTML);
                });
                phantom.exit();    
            }, 300);
        }
    });
}

catch(e) {
    console.log("Error: " + e.description);
}
sara
  • 1
  • 1
  • Waiting `300` milliseconds might be a bit short, because it's a fully dynamic site. Try setting it to 5000. – Artjom B. Aug 26 '15 at 07:18
  • @ArtjomB. setting setTimeout to 5000 or even more produced this error on page.Error callback: 'undefined' is not a function (evaluating 'r.bind(this,n)'). I noticed this error occurred at the event to execute this js 'https://app.kabuto.com/js/config/config.custom.js' where this javascript call was never been made when setTimeout was 300ms. – sara Aug 26 '15 at 10:33
  • Thanks @ArtjomB. for such a quick help! That worked! :) – sara Aug 26 '15 at 11:22

0 Answers0