6

Based on this response, is there a way (like with casperjs/phantomjs) to add our custom functions in page.evaluate() context?

By example, include a file with a helper function x to call an Xpath function : x('//a/@href')

ggorlen
  • 44,755
  • 7
  • 76
  • 106
MevatlaveKraspek
  • 2,246
  • 3
  • 20
  • 23

2 Answers2

8

You can register helper functions to run in the browser context in separate page.evaluate() calls. page.exposeFunction() looks tempting, but it doesn't have access to browser context (and you need the document object).

Here is an example of registering helper functions like $x() in the browser context:

const puppeteer = require('puppeteer');

const addHelperFunctions = () => {
    window.$x = xPath => document
        .evaluate(
            xPath,
            document,
            null,
            XPathResult.FIRST_ORDERED_NODE_TYPE,
            null
        )
        .singleNodeValue;
};

(async () => {
    const browser = await puppeteer.launch();
    const page = await browser.newPage();
    await page.goto('https://en.wikipedia.org', { waitUntil: 'networkidle2' });

    await page.evaluate(addHelperFunctions);

    const text = await page.evaluate(() => {
        // $x() is now available
        const featureArticle = $x('//*[@id="mp-tfa"]');

        return featureArticle.textContent;
    });
    console.log(text);
    await browser.close();
})();

You can also keep helpers in a separate file and inject them into the browser context using page.addScriptTag().

Here is an example of it:

helperFunctions.js

window.$x = xPath => document
    .evaluate(
        xPath,
        document,
        null,
        XPathResult.FIRST_ORDERED_NODE_TYPE,
        null
    )
    .singleNodeValue;

And use it:

const puppeteer = require('puppeteer');

(async () => {
    const browser = await puppeteer.launch();
    const page = await browser.newPage();
    await page.goto('https://en.wikipedia.org', { waitUntil: 'networkidle2' });

    await page.addScriptTag({ path: './helperFunctions.js' });

    const text = await page.evaluate(() => {
        // $x() is now available
        const featureArticle = $x('//*[@id="mp-tfa"]');

        return featureArticle.textContent;
    });
    console.log(text);
    await browser.close();
})();
ggorlen
  • 44,755
  • 7
  • 76
  • 106
Everettss
  • 15,475
  • 9
  • 72
  • 98
0

Another solution based on getElementByXPath() and getElementsByXPath(). The advantage is that we can use an xpath expression against a particular node (second argument).

window.$x = xPath => document
    .evaluate(
        xPath,
        document,
        null,
        XPathResult.FIRST_ORDERED_NODE_TYPE,
        null
    )
    .singleNodeValue;

window.getElementByXPath = function getElementByXPath(expression, scope) {
    scope = scope || document;
    var a = document.evaluate(expression, scope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
    if (a.snapshotLength > 0) {
        return a.snapshotItem(0);
    }
};

window.getElementsByXPath = function getElementsByXPath(expression, scope) {
    scope = scope || document;
    var nodes = [];
    var a = document.evaluate(expression, scope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
    for (var i = 0; i < a.snapshotLength; i++) {
        nodes.push(a.snapshotItem(i));
    }
    return nodes;
};

Real life code sample :

const puppeteer = require('puppeteer');

(async () => {

    const browser = await puppeteer.launch();
    const page = await browser.newPage();

    await page.goto('https://99bitcoins.com/bitcoin-rich-list-top100/#addresses', { waitUntil: 'networkidle2' });
    await page.addScriptTag({ path: './helperFunctions.js' });

    const result = await page.evaluate(() => {
        var obj = {};
        var data = getElementsByXPath('//table[@class="t99btc-rich-list"]//tr');
        for (var i = 1; i<=100; i++) {
           obj[i] = {
               "hash": getElementByXPath('./td/a', data[i]).innerText,
               "balance": getElementByXPath('./td[3]', data[i]).innerText
           }
        }

        return obj;

    });
    console.log(JSON.stringify(result, null, 4));
    await browser.close();

})();
MevatlaveKraspek
  • 2,246
  • 3
  • 20
  • 23