1

I need to select two elements (two tabs of five within a web) for click over them and scrap the tables returned. Each tab is a 'li' and its child is a 'span' that contains the onclick event. I always get the list of tabs with the correct number of them but only the first element is not null, and even in the first element I can't call the onclick event to show me its tables. Also the 'id' that contains the onclick event is random.

I using phantomjs 2.1.1 and casperjs 1.1.4

JS code:

//Wait to be redirected to the Home page, and then make a screenshot
casper.then(function(){
    casper.wait(5000, function(){
        this.capture('home.png');
        var tabs = casper.evaluate(function() {
            //return document.querySelectorAll('[id^="dbTabLabel_"]'); //Another selector option
            return document.querySelectorAll('span.dashTitle');
        }
        console.log('Num Tabs: ' + tabs.length););

        for(i = 0; i < tabs.length; i++) {
            if(tabs[i]) {
                console.log('Form exists');
                console.log('form id: ' + tabs[i].id);
                // create a mouse click event
                var event = document.createEvent( 'MouseEvents' );
                event.initMouseEvent( 'click', true, true, window, 1, 0, 0 );        
                // send click to element
                tabs[i].dispatchEvent( event );
                var name = tabs[i].innerText + '.png'
                casper.wait(2000, function(){ // Wait to load completely
                    this.capture(name); //Make a screenshot for each tab
                });
            } else {
                console.log("Null Tab");
            }
        }
    })    
});

The output:

Num Tabs: 5
Form exists
form id: dbTabLabel_820718256523832
Null Tab
Null Tab
Null Tab
Null Tab

I want scrap this web (html code when I'm login in and save the web with getHTML casper function). The web screenshot here. And this is the fragment corresponding to the tabs:

<!--TEMPLATES-->
    <ul id="tabul">
        <li id="litab" class="ntabs add"><a href="" id="addtab" class="osx">+</a></li>
        <li id="litab" class="add rightAlign setting-item">
            <img src="/Content/images/icons/expand-24x24.png" class="out-triggerer gray" onclick="fullScreen()">            
        </li>        
        <li id="default-report-export" class="rightAlign">
            <a href="/report/defaultExport" download="">
                <input type="image" src="/Content/images/icons/excel.gif" value="Excel" title="Export default report">
            </a>
        </li>
        <li id="default-report-export" class="rightAlign">
            <a href="/report/defaultExport?isPdf=true" download="">
                <input type="image" src="/Content/images/export-pdf-24x24.png" value="Excel" title="Export default report">
            </a>
        </li>
        <li id="dbTab_889113733777776" class="ntabs addedTab activeTab">
            <span id="dbTabLabel_889113733777776" class="dashTitle" onclick="clickDashboard('889113733777776')">Dashboard EUR</span>
            <span id="dbTabSettings_889113733777776" class="settingsContainer dashSettings" style="">
                <div id="topnav" class="topnav">
                    <a href="javascript:void(0)" class="signin" onclick="toggleTabSettingsMenu('889113733777776',true);">
                        <span><img src="/Content/Images/icon_gear.png" alt="Edit"></span>
                    </a>
                </div>
                <fieldset id="dbTabSettingsMenu_889113733777776" class="dashSettings-menu">
                    <ul class="dashboardEditMenu">
                        <img src="/Content/images/close.png" onclick="toggleTabSettingsMenu('889113733777776',false);" alt="tooltip" style="position:absolute;right:2px;top:2px;border:0;">
                        <li class="dashboardEditMenuList">
                            <a href="javascript:void(0)" class="addWidget" onclick="toggleLeftUpdatePanelMenu(true);"> Añadir widgets</a>
                        </li>
                        <li class="dashboardEditMenuList">
                             <a href="javascript:void(0)" class="closeDash" onclick="deleteDashboard('889113733777776')"> Borrar este dashboard</a>
                        </li>
                    </ul>
                </fieldset>
            </span>
        </li>
        <li id="dbTab_894967889413237" class="ntabs addedTab">
            <span id="dbTabLabel_894967889413237" class="dashTitle" onclick="clickDashboard('894967889413237')">Dashboard USD</span>
            <span id="dbTabSettings_894967889413237" class="settingsContainer dashSettings" style="display:none;">
                <div id="topnav" class="topnav">
                    <a href="javascript:void(0)" class="signin" onclick="toggleTabSettingsMenu('894967889413237',true);">
                        <span><img src="/Content/Images/icon_gear.png" alt="Edit"></span>
                    </a>
                </div>
                <fieldset id="dbTabSettingsMenu_894967889413237" class="dashSettings-menu"> 
                    <ul class="dashboardEditMenu">
                        <img src="/Content/images/close.png" onclick="toggleTabSettingsMenu('894967889413237',false);" alt="tooltip" style="position:absolute;right:2px;top:2px;border:0;">
                        ...
                   </ul>
            </fieldset>
        </span>
    </li>
</ul>

I don't know if my problem is if my problem is related to this post I have read. But the proposed solution I can't do it because my 'ids' are random and I can't make "static" selectors for get them.

crossmax
  • 346
  • 3
  • 20
  • > Elements cannot be reliably serialized when transferring them between different owner documents (in this case, a WebPage instance and the Phantom outer context), period. Grab the data you need from them (e.g. href values rom anchors, etc.) and return just that to the Phantom context. If you need to do advanced manipulation, do it within the owner document (WebPage instance) via your `page.evaluate`. – Hamms Apr 03 '18 at 00:10
  • The proposed solution from that post seems correct. Just do the work in `evaluate` – Hamms Apr 03 '18 at 00:10
  • @Hamms Can you tell me how to do it? I'm newbie and the asynchronous calls are killing me – crossmax Apr 03 '18 at 00:23

1 Answers1

1

it's not an issue with document.querySelectorAll() because you have the right number of tabs in console.log('Num Tabs: ' + tabs.length);

it's because of the asynchronous casper.wait() here's an answer about asynchronous process in a loop

The for loop runs immediately to completion while all your asynchronous operations are started. When they complete some time in the future and call their callbacks, the value of your loop index variable i will be at its last value for all the callbacks.

one way to work this out is to use use es6's let instead of var like

for(let i = 0; i < tabs.length; i++) { ... ES6 var vs let

or use .forEach since it creates its own function closure

for vs .forEach()

replace your for(i = 0; i < tabs.length; i++) { with tabs.forEach(function(tab)) { .. and access the tab with tab instead of tabs[i]

here's a snippet to demonstrate it :

var tabs = ['tab1', 'tab2', 'tab3', 'tab4']

for(var i = 0; i < tabs.length; i++){
    if(tabs[i]){
     setTimeout(function(){ // simulating casper.wait
            console.log('in the for loop with var : ' , tabs[i]);
        }, 1000);
    }
    else{
     console.log('none');
    }
}

for(let i = 0; i < tabs.length; i++){
    if(tabs[i]){
     setTimeout(function(){ // simulating casper.wait
            console.log('in the for loop with let : ' , tabs[i]);
        }, 1000);
    }
    else{
     console.log('none');
    }
}

tabs.forEach(function(e){
     if(e){
        setTimeout(function(){  // simulating casper.wait
            console.log('in the forEach loop : ' , e);
        }, 1000);
     }
     else{
      console.log('none');
     }
})
Taki
  • 17,320
  • 4
  • 26
  • 47
  • I'm testing the changes but not work. If I used `tabs.foreach(function(tab) {` the output is `Num Tabs: 5 TypeError: undefined is not a function (evaluating 'tabs.forEach'` and if I used `for(let i = 0;...) {` the output is `SyntaxError: Expected token 'in'` and I think that `let` doesn't like phantomjs – crossmax Apr 03 '18 at 00:43
  • do you know on what line is the `Expected token 'in'` ? – Taki Apr 03 '18 at 00:51
  • When set in the loop `let i=0` the `SyntaxError: Expected token 'in'` appears and js file is not execute. If change `let` for `var` the js is executed. I've tried so many things that in some I already had the error when trying to create variables with `let` type and I don't know why – crossmax Apr 03 '18 at 07:13
  • I've tried with a `this.each(...) {` that I've read [here](https://stackoverflow.com/questions/40412726/casperjs-iterating-over-a-list-of-links-using-casper-each) but the five tabs selected are `undefined` (null). I never thought that this was so difficult – crossmax Apr 03 '18 at 09:02
  • You just have to read the error you're given, can you post all of it ? The error will tell on what line and which function you're having trouble – Taki Apr 03 '18 at 12:18
  • I run js file from shell with `casperjs logon.js` and just see this message: `SyntaxError: Expected token 'in'`. Nothing about line or more description error – crossmax Apr 03 '18 at 13:15