1

I have made a code that parses all URL-s from a page. Next, I would like to get a href from every parsed URL <div class="holder"><a href="THESE URL-s"></a></div> and output it to a file and sepparate with a comma.

So far I have made this code. It is able to find all the URL-s need to be parsed and collects them to a comma sepparated file called output2.txt.

var resourceWait  = 300,
maxRenderWait = 10000,
url = 'URL TO PARSE HREF-s FROM';
var page          = require('webpage').create(),
count         = 0,
forcedRenderTimeout,
renderTimeout;
page.viewportSize = { width: 1280, height : 1024 };

function doRender() {
    var fs = require('fs');
    var path = 'output2.txt';
    page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
        fs.write(path,page.evaluate(function() {
            return $('.urlDIV').find('a')
            .map(function() {
            return this.href;})
            .get()
            .join(',');
        }), 'w');

        phantom.exit()
    });
}

page.onResourceRequested = function (req) {
    count += 1;

    clearTimeout(renderTimeout);
};

page.onResourceReceived = function (res) {
    if (!res.stage || res.stage === 'end') {
        count -= 1;

        if (count === 0) {
            renderTimeout = setTimeout(doRender, resourceWait);
        }
    }
};

page.open(url, function (status) {
    if (status !== "success") {

        phantom.exit();
        } else {
        forcedRenderTimeout = setTimeout(function () {
            console.log(count);
            doRender();
        }, maxRenderWait);
    }
});

Thanks in advance,

Martti

  • You mean you want to open all URLs sequentially? See [Looping over urls to do the same thing](http://stackoverflow.com/questions/26681464/looping-over-urls-to-do-the-same-thing) – Artjom B. Sep 21 '15 at 13:56

0 Answers0