Recently, I have been experimenting with the phantomjs-node
library. The thing I wanted to achieve was basically to create a dynamic web-page template, employ the phantomjs-node
library to "run" it, and finally extract some data from the rendered page.
In the simplest setting, the first attempt to approach this looked like this (in the example below, the template is just static, nevertheless it might contain in principle some further logic utilizing external libraries, etc.):
var phantom = require('phantom');
var co = require('co');
var sleep = require('system-sleep');
var winston = require('winston');
const logger = new winston.Logger({
level: 'debug',
transports: [new winston.transports.Console({
json: false, timestamp: () => (new Date()).toLocaleString()
})]
});
co(function*() {
logger.info('start');
var instance = yield phantom.create();
try {
const html = `
<!DOCTYPE html>
<html>
<head>
<title>Page title</title>
</head>
<body>
<div id='results'>Page data</div>
</body>
</html>
`;
var page = yield instance.createPage();
yield page.on('onLoadFinished', function(){
logger.info('onLoadFinished');
page.evaluate(function(){
return document.getElementById('results').textContent;
}).then(function(val){
logger.info(`RESULT = ${val}`);
}).catch(function(val){
logger.error(val.message);
});
});
yield page.setContent(html, 'http://localhost');
}catch (e){
logger.error(e.message);
}finally{
instance.exit();
}
logger.info('done');
});
However, this fails with the output:
12/18/2017, 2:44:32 PM - info: start
12/18/2017, 2:44:33 PM - info: done
12/18/2017, 2:44:33 PM - info: onLoadFinished
12/18/2017, 2:44:33 PM - error: Phantom process stopped with exit code 0
most likely because when the then
-callback of the promise returned by page.evaluate
is finally invoked, the main phantom process has already exited.
In order to "fix" this, I resorted to the following improvised strategy (omitting the rest of the example below):
var page = yield instance.createPage();
var resolver;
var P = new Promise(function(resolve, reject){ resolver = resolve; });
yield page.on('onLoadFinished', function(){
logger.info('onLoadFinished');
resolver(page.evaluate(function(){
return document.getElementById('results').textContent;
}));
});
yield page.setContent(html, 'http://localhost');
const val = yield P;
logger.info(`RESULT = ${val}`);
This essentially creates a new promise which is "externally" resolved with the promise returned from page.evaluate
. The yield P
statement at the end of the co
block then blocks until the required result is ready, thus the output is as expected:
12/18/2017, 2:53:47 PM - info: start
12/18/2017, 2:53:48 PM - info: onLoadFinished
12/18/2017, 2:53:48 PM - info: RESULT = .....
12/18/2017, 2:53:48 PM - info: done
Although this seems to work, it feels quite "hacky" (for example exceptions thrown in the callback before the invocation of resolver
won't be detected in the main try/catch
block), so I was wondering what would be a cleaner approach in order to "transfer" control from the onLoadFinished
callback back into the realm managed by co
?