I have made a code that parses all URL-s from a page. Next, I would like to get a href from every parsed URL <div class="holder"><a href="THESE URL-s"></a></div>
and output it to a file and sepparate with a comma.
So far I have made this code. It is able to find all the URL-s need to be parsed and collects them to a comma sepparated file called output2.txt.
var resourceWait = 300,
maxRenderWait = 10000,
url = 'URL TO PARSE HREF-s FROM';
var page = require('webpage').create(),
count = 0,
forcedRenderTimeout,
renderTimeout;
page.viewportSize = { width: 1280, height : 1024 };
function doRender() {
var fs = require('fs');
var path = 'output2.txt';
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
fs.write(path,page.evaluate(function() {
return $('.urlDIV').find('a')
.map(function() {
return this.href;})
.get()
.join(',');
}), 'w');
phantom.exit()
});
}
page.onResourceRequested = function (req) {
count += 1;
clearTimeout(renderTimeout);
};
page.onResourceReceived = function (res) {
if (!res.stage || res.stage === 'end') {
count -= 1;
if (count === 0) {
renderTimeout = setTimeout(doRender, resourceWait);
}
}
};
page.open(url, function (status) {
if (status !== "success") {
phantom.exit();
} else {
forcedRenderTimeout = setTimeout(function () {
console.log(count);
doRender();
}, maxRenderWait);
}
});
Thanks in advance,
Martti