8

I am having trouble running jQuery in PhantomJS. I have found this answer, which talks about no variable is available inside evaluate function but the question is about a node module and on my example I only call console.log inside evaluate function. I have put this question on GitHub too.

Previously, for some pages, the following evaluate code didn't execute. Now that @b1f56gd4 has provided some help, it now prints messages; I can't execute it but now I can see this:

The page at https://login.yahoo.com/ ran insecure content from http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js.

I can't load jQuery from different domain and the --local-to-remote-url-access=true or --web-security=false options make no difference.

I will try load jQuery locally. Here is the code:

console.log('Loading a web page');
var url = 'https://login.yahoo.com/'; 
var page = require('webpage').create();
console.log('Setting error handling');
page.onConsoleMessage = function (msg) {
    console.log(msg);
};
page.onError = function (msg, trace) {
    console.log(msg);
    trace.forEach(function(item) {
        console.log('  ', item.file, ':', item.line);
    })
    phantom.exit();
}
console.log('Error handling is set');
console.log('Opening page');
page.open(url, function (status) {
    if (status != 'success') {
        console.log('F-' + status);
    } else {
        console.log('S-' + status); 
        //-------------------------------------------------     
        var jsLoc = '';
        jsLoc = 'jquery.min.js'; // to load local
        //jsLoc = 'http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js'; // to load remote
        var func = function(pg){
            console.log('Function called');
            console.log('Page evaluating');
            console.log(pg);
            pg.evaluate(function() {
                console.log('Page evaluate started');               
                //---
                var loginVar = 'ih5d4hf65465fd45h6@yahoo.com.br';
                var pwdVar = 'itsmypass_445f4hd564hd56f46s'; 
                //---
                $("#login_form #username").value = loginVar;
                $("#login_form #passwd").value = pwdVar;
                //---
            });
            console.log('Rendering');
            pg.render('ystsA.png');
            console.log('Rendered');
        }
        if (typeof jQuery == 'undefined') {  
            console.log('JQuery Loading');  // <<<<==== Execute only until here
            console.log('Source:['+jsLoc+']');
            var rs = page.includeJs(jsLoc, function()  // <<<<===== Fail here, jsLoc was changed to load locally and after tried remotely, i tried use page.injectJs but fail too
            { 
                console.log('JQuery Loaded');  // <<<< ===== Never reach here, no matter if loading local or remote script in include above
                func(page); 
            });
            page.render('ystsB.png');
        } else {
            console.log('JQuery Already Loaded');
            func(page);
            page.render('ystsC.png');
        }
        //-------------------------------------------------
    }
    phantom.exit();
});

After reading @g4d564w56 answer i did all without JQuery then i can fill textbox but cant click on button to post on login form.
See the new code:

console.log('Loading a web page');
var url = 'https://login.yahoo.com/'; 
var page = require('webpage').create();
console.log('Setting error handling');
page.onConsoleMessage = function (msg) {
    console.log(msg);
};
page.onError = function (msg, trace) {
    console.log(msg);
    trace.forEach(function(item) {
        console.log('  ', item.file, ':', item.line);
    })
    phantom.exit();
}
console.log('Error handling is set');
console.log('Opening page');
page.open(url, function (status) {
    if (status != 'success') {
        console.log('F-' + status);
    } else {
        console.log('S-' + status); 
        //-------------------------------------------------     
        var jsLoc = '';
        jsLoc = 'jquery.min.js'; // to load local
        //jsLoc = 'http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js'; // to load remote      
        var act01 = function(pg){
            console.log('Function called');
            console.log('Page evaluating');
            console.log(pg);
            pg.evaluate(function() {
                var getElmById = function(id){
                    return document.getElementById(id);
                }           
                console.log('Page evaluate started');               
               //---
                var loginVar = 'ih5d4hf65465fd45h6@yahoo.com.br';
                var pwdVar = 'itsmypass_445f4hd564hd56f46s'; 
                //---
                getElmById("username").value = loginVar;
                getElmById("passwd").value = pwdVar;
                getElmById("login_form").submit(); /// <<<<==== now its dont work !!!
                //---
            });
            console.log('Rendering');
            pg.render('ystsA.png');
            console.log('Rendered');
        }
        act01(page);
        //-------------------------------------------------
    }
    phantom.exit();
});
Community
  • 1
  • 1
newway
  • 647
  • 1
  • 13
  • 21
  • The solution is here: http://stackoverflow.com/questions/11121734/evaluate-doesnt-work/11121792#11121792 –  Apr 06 '13 at 15:35
  • @b1f56gd4 Thanks a lot b1f56gd4. The problem was phantmJs dont execute console.log inside evaluate block by default, the page above show thr trick. Now i can see a lot of "The page at ran insecure content from " and "Unsafe JavaScript attempt to access frame with URL from frame with URL Domains, protocols and ports must match.". Thanks now i can see and debug. – newway Apr 06 '13 at 15:51
  • I know this question is a year old, but for those finding this question by searching from google the problem in this particular case is that an http resource is being used in an https page. In order to load jquery all you have to do is use an https url. – derickito May 15 '14 at 23:55

5 Answers5

5

I know this question has already been answer about a year ago, but the answer didn't really address the issue. The reason for the error below:

"The page at https://login.yahoo.com/ ran insecure content from http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js."

Is that the login page is an https page and you're trying to load an http resource. If you change the url to https://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js this error will go away. Took a while to figure that out.

joe_young
  • 4,107
  • 2
  • 27
  • 38
derickito
  • 695
  • 7
  • 25
4

A working version using google search.

var page, doSearch, displayResults;
page = require('webpage').create();

doSearch = function() {
    console.log('Searching...');
    page.evaluate(function() {
        $("input[name=q]").val('what is phantomjs');
        $("form").trigger('submit');
        return true;
    });
    page.render('phantomjs-searching.png');
};

displayResults = function() {
    console.log('Results...');
    page.evaluate(function() {
        $('h3 a').each(function(i) {
            console.log([i + 1, $(this).text(), ' // ' + $(this).attr('href')].join(': '));
        });
        return true;
    });
    page.render('phantomjs-results.png');
};

page.onLoadFinished = function(status) {
    if (status === 'success') {
        page.includeJs('http://ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js', function() {
            if (!phantom.state) {
                doSearch();
                phantom.state = 'results';
            } else {
                displayResults();
                phantom.exit();
            }
        });
    } else {
        console.log('Connection failed.');
        phantom.exit();
    }
};

page.onConsoleMessage = function(msg) {
    console.log(msg);
};

page.open('http://google.com');
Gil Barbara
  • 66
  • 2
  • 5
2

Try the next code from http://snippets.aktagon.com/snippets/534-How-to-scrape-web-pages-with-PhantomJS-and-jQuery. It loads a local copy of jQuery, but can also use the jQuery instance loaded by the requested page.

var page = new WebPage(),
     url = 'http://localhost/a-search-form',
     stepIndex = 0;

 /**
  * From PhantomJS documentation:
  * This callback is invoked when there is a JavaScript console. The callback may accept up to three arguments: 
  * the string for the message, the line number, and the source identifier.
  */
 page.onConsoleMessage = function (msg, line, source) {
     console.log('console> ' + msg);
 };

 /**
  * From PhantomJS documentation:
  * This callback is invoked when there is a JavaScript alert. The only argument passed to the callback is the string for the message.
  */
 page.onAlert = function (msg) {
     console.log('alert!!> ' + msg);
 };

 // Callback is executed each time a page is loaded...
 page.open(url, function (status) {
   if (status === 'success') {
     // State is initially empty. State is persisted between page loads and can be used for identifying which page we're on.
     console.log('============================================');
     console.log('Step "' + stepIndex + '"');
     console.log('============================================');

     // Inject jQuery for scraping (you need to save jquery-1.6.1.min.js in the same folder as this file)
     page.injectJs('jquery-1.6.1.min.js');

     // Our "event loop"
     if(!phantom.state){
       initialize();
     } else {
       phantom.state();
     } 

     // Save screenshot for debugging purposes
     page.render("step" + stepIndex++ + ".png");
   }
 });

 // Step 1
 function initialize() {
   page.evaluate(function() {
     $('form#search input.query').val('Jebus saves');
     $('form#search').submit();
     console.log('Searching...');
   });
   // Phantom state doesn't change between page reloads
   // We use the state to store the search result handler, ie. the next step
   phantom.state = parseResults; 
 }

 // Step 2
 function parseResults() {
   page.evaluate(function() {
     $('#search-result a').each(function(index, link) {
       console.log($(link).attr('href'));
     })
     console.log('Parsed results');
   });
   // If there was a 3rd step we could point to another function
   // but we would have to reload the page for the callback to be called again
   phantom.exit(); 
 }
lmeurs
  • 16,111
  • 4
  • 27
  • 30
  • Thanks a lot for your time and help. I have made minimal changes to make it executable on a real site, maybe i break doing it, but when i run the code it dont return, stay hours and dont return, only one screenshot (the first) is taken. anyway thanks a lot. – newway Jul 07 '13 at 18:51
1

There is a well know bug that PhantomJS cant load JQuery, will be hard to post some form data to server but you can select elements only using querySelectorAll like this example: how to scrape links with phantomjs

Community
  • 1
  • 1
g4d564w56
  • 35
  • 1
  • Thanks for this information, on my searches never found this information about this bug, never imagine PhantomJS cant load JQuery because its a general headless browser. Thinking about come back to HtmlUnit – newway Apr 11 '13 at 11:55
  • 6
    This is NOT true; PhantomJS can load jQuery just fine, as per the examples at https://github.com/ariya/phantomjs/blob/master/examples/phantomwebintro.js and http://phantomjs.org/page-automation.html. Sometimes `page.includeJs(url_to_jquery, function() { ... }` doesn't work for me, and so I use `page.injectJs("./jquery.min.js")` instead :-). – elimisteve Jul 09 '14 at 03:56
  • @elimisteve If I copy an paste the page-automation sample into a file and run `$ phantomjs sample.phantomjs`. I get the "Unsafe JavaScript attempt to access frame with URL about:blank from frame with URL" message. Thanks for your suggestion about `page.injectJs`. – Tobias Jan 30 '15 at 16:10
1

@lmeurs answer is very good but is not functional.
I used the answer to create something functional to you :) .

var page = new WebPage();
var url = 'http://br.search.yahoo.com';
var stepIndex = 0;

page.onConsoleMessage = function (msg, line, source) { console.log('console> ' + msg); };

page.onAlert = function (msg) { console.log('alert!!> ' + msg); };

function takeShot(){
    console.log("TakingShot"); 
    page.render("step" + stepIndex + ".png");
    console.log("ShotTake");     
}

function step0() {
    console.log("step 00 enter");
    page.evaluate(function() {
        $("form [type='text']").val('its now sunday searching it');
        $("form [type='submit']").submit();     
    });
    console.log("step 00 exit");
}

function step1() {
    console.log("step 01 enter");
    page.evaluate(function() {
        $('#search-result a').each(function(index, link) {
            console.log($(link).attr('href'));
        })
    });
    console.log("step 01 exit");
    phantom.exit(); 
}

page.open(url, function (status) {
    console.log("[- STARTING -]");
    if (status === 'success') {
        var cmd = ""
        page.injectJs('jquery-1.6.1.min.js');
        while(true)
        {
            console.log("Step["+stepIndex+"] starting on ["+new Date()+"]");
            //cmd = "var x = step"+stepIndex+";"
            //console.log(cmd);
            //eval(cmd);
            switch(stepIndex){
                case 0:
                    step0();
                    break;
                case 1:
                    step1();
                    break;                  
            }
            takeShot();
            stepIndex++;
        }       
    }
});
Artjom B.
  • 61,146
  • 24
  • 125
  • 222
  • 1
    Thanks a lot for your time and help. I ran your code as is, i dont changed nothing, the only one thing i did was put the [jquery-1.6.1.min.js] file on same folder of your script. Its ran ok and returned, it take screenshot of home yahoo search as expected on step1 but dont print the link list of result expected from step2, on step2 its take screenshot of home yahoo search page again, its looking like dont posting search argument on yahoo form. anyway thanks a lot – newway Jul 07 '13 at 19:01