I'm using nightmare.js to scrape public records and am just trying to get the scraper to wait for the next page to load. I'm crawling search results which I press a next button to (obviously) get to the next page. I can't use nightmare.wait(someConstTime)
to accurately wait for the next page to load because sometimes someConstTime
is shorter than the time it takes for the next page to load (although it's always under 30 seconds). I also can't use nightmare.wait(selector)
because the same selectors are always present on all result pages. In that case nightmare basically doesn't wait at all because the selector is already present (on the page I already scraped) so it it will proceed to scrape the same page several times unless the new page loads before the next loop.
How can I conditionally wait for the next page to load after I click on the next button?
If I could figure out how - I would compare the "Showing # to # of ## entries" indicator of the current page (currentPageStatus
) to the last known value (lastPageStatus
) and wait until they're different (hence the next page loaded).
(ignore that the example image only has one search result page)
I'd do that using this code from https://stackoverflow.com/a/36734481/3491991 but that would require passing lastPageStatus
into deferredWait
(which I can't figure out).
Here's the code I've got so far:
// Load dependencies
//const { csvFormat } = require('d3-dsv');
const Nightmare = require('nightmare');
const fs = require('fs');
var vo = require('vo');
const START = 'http://propertytax.peoriacounty.org';
var parcelPrefixes = ["01","02","03","04","05","06","07","08","09","10",
"11","12","13","14","15","16","17","18","19"]
vo(main)(function(err, result) {
if (err) throw err;
});
function* main() {
var nightmare = Nightmare(),
currentPage = 0;
// Go to Peoria Tax Records Search
try {
yield nightmare
.goto(START)
.wait('input[name="property_key"]')
.insert('input[name="property_key"]', parcelPrefixes[0])
// Click search button (#btn btn-success)
.click('.btn.btn-success')
} catch(e) {
console.error(e)
}
// Get parcel numbers ten at a time
try {
yield nightmare
.wait('.sorting_1')
isLastPage = yield nightmare.visible('.paginate_button.next.disabled')
while (!isLastPage) {
console.log('The current page should be: ', currentPage); // Display page status
try {
const result = yield nightmare
.evaluate(() => {
return [...document.querySelectorAll('.sorting_1')]
.map(el => el.innerText);
})
// Save property numbers
// fs.appendFile('parcels.txt', result, (err) => {
// if (err) throw err;
// console.log('The "data to append" was appended to file!');
// });
} catch(e) {
console.error(e);
return undefined;
}
yield nightmare
// Click next page button
.click('.paginate_button.next')
// ************* THIS IS WHERE I NEED HELP *************** BEGIN
// Wait for next page to load before continue while loop
try {
const currentPageStatus = yield nightmare
.evaluate(() => {
return document.querySelector('.dataTables_info').innerText;
})
console.log(currentPageStatus);
} catch(e) {
console.error(e);
return undefined;
}
// ************* THIS IS WHERE I NEED HELP *************** END
currentPage++;
isLastPage = yield nightmare.visible('.paginate_button.next.disabled')
}
} catch(e) {
console.error(e)
}
yield nightmare.end();
}