0

I use casperjs for grab some test.

Algorithm is open URL parse page, click button for load next page. How to grab next pages until test is complete. All question get random and I don't now next question before form submitted.

I need parse pages like a cycle or recursive. My code is:

casper.start(startUrl, function () {
    this.click('#training');
    this.evaluate(function () {
        $('input[type="submit"]:first').click();
    });
});

casper.then(function () {
    var currentUrl = this.getCurrentUrl(),
        startIdPos = currentUrl.indexOf('=') + 1,
        questionId = currentUrl.slice(startIdPos),
        content = $(this.getHTML()),
        answers = [],
        question,
        startCorrectAnswerPos = content.find('script:nth-child(2)').html().indexOf('var bc='),
        correctAnswer = content.find('script:nth-child(2)').html().slice(startCorrectAnswerPos + 8, startCorrectAnswerPos + 9);

    question = content.find('table.quizz p.qw').html();

    console.log(">>>>>>" + this.getCurrentUrl());

    this.fill('form', {
        'answer': correctAnswer
    }, true);
});

casper.run();

This code complete parse only one page, but doesn't redirect to next page and parse it. What I do wrong?

Eugene
  • 1,690
  • 3
  • 16
  • 30
  • Here is good tutorial explaining how to handle pagination with CasperJS: http://code-epicenter.com/web-scraping-with-casperjs-handling-pagination/ – MrD Oct 18 '15 at 15:37

1 Answers1

3

EDIT: You need to nest the steps for the following pages, because on every page you evaluate if it is necessary to go further. Also you should check the URL after you submitted the form.

function answer() {
    var currentUrl = this.getCurrentUrl(),
        startIdPos = currentUrl.indexOf('=') + 1,
        questionId = currentUrl.slice(startIdPos),
        content = $(this.getHTML()),
        answers = [],
        question,
        startCorrectAnswerPos = content.find('script:nth-child(2)').html().indexOf('var bc='),
        correctAnswer = content.find('script:nth-child(2)').html().slice(startCorrectAnswerPos + 8, startCorrectAnswerPos + 9);

    question = content.find('table.quizz p.qw').html();

    console.log(">>>>>>" + this.getCurrentUrl());

    if (question) {
        this.then(function(){
            this.fill('form', {
                'answer': correctAnswer
            }, true);
        });
        this.then(answer);
    }
};

casper.then(answer);

Exchange this code for your casper.then block.


Previous Answer: I don't know what kind of button/link #training is, but it may be that you need to wait for the change in the page to occur. You could use the casper.waitForSelector function.

Also I'm not sure why you write

this.evaluate(function () {
    $('input[type="submit"]:first').click();
});

and not simply this.click('input[type="submit"]:first');.

Artjom B.
  • 61,146
  • 24
  • 125
  • 222
  • this is first page like "start test" (this.click('#training');), after that I need press – Eugene Apr 25 '14 at 14:43
  • You mean that the `casper.then` block is executed for the first page correctly? – Artjom B. Apr 25 '14 at 14:52
  • Yes first page is correct, but after that I need submit page several times (for example 20 times) and url does not change. – Eugene Apr 25 '14 at 15:03
  • Thank's a lot. How easy it is! – Eugene Apr 25 '14 at 17:10
  • Keep in mind that you cannot do anything in the same recursion level after you called the next recursion or it will break your brain. – Artjom B. Apr 25 '14 at 17:15