My local government has thrown the idea of open public data into the bin and chosen instead to publish its COVID-19 updates via an online Power BI dashboard (with no download option). The dashboard lacks a static url
, but you can access it here.
When published online, Power BI dashboards have a complex HTML structure that doesn't seem to respond to scrapers like a normal HTML
doc.
Using Puppeteer (a Node.js module), I can scrape some elements from the first page of the dashboard. What I really need is to access the second page. To get this, I need to 'click' on the right arrow at the bottom of the screen. This can be selected successfully with document.querySelector(".pbi-glyph-chevronrightmedium")
.
However, I can't interact with that element to reach the second page. While Puppeteer can find it, it can't click it. The first line here works; the second doesn't:
await page.waitForSelector(".pbi-glyph-chevronrightmedium");
await page.click(".pbi-glyph-chevronrightmedium");
Any ideas? There was another question published about scraping from a Power BI dashboard but it covered different aspects. I can't perform the simple task of clicking the 'Next page' arrow.
const puppeteer = require("puppeteer");
async function scrape() {
var browser = await puppeteer.launch(),
page = await browser.newPage();
return new Promise(async function(resolve, reject) {
try {
await page.goto("https://www.covid19.act.gov.au/updates/confirmed-case-information");
await page.waitForSelector(".col-md-12 a");
let dashboardUrl = await page.evaluate(function() {
return document.querySelector(".col-md-12 a").href;
});
await page.goto(dashboardUrl);
await page.waitForSelector(".pbi-glyph-chevronrightmedium");
console.log("Found the arrow!");
await page.click(".pbi-glyph-chevronrightmedium");
console.log("Clicked the arrow!");
browser.close();
return resolve("end scrape");
} catch (error) {
return reject(error);
}
});
}
scrape()
.then(console.log)
.catch(console.error);