As a follow up question to Unable to extract footer Fields with cheerio , I'm trying to scrape the header and footer of a table with node and puppeteer. I'm eventually going to turn it into a serverless function. For now, my code is:
const puppeteer = require("puppeteer-core");
const chromium = require("@sparticuz/chromium");
exports.handler = async function (event, context) {
// const body = JSON.parse(event.body); // postencoded
// const apn = body.apn;
console.log('starting ', apn);
const browser = await puppeteer.launch({
args: chromium.args,
defaultViewport: chromium.defaultViewport,
executablePath: await chromium.executablePath(),
headless: chromium.headless,
ignoreHTTPSErrors: true,
});
const page = await browser.newPage();
const url = "https://www.to.pima.gov/propertyInquiry/?stateCodeB=129&stateCodeM=05&stateCodeP=0070";
await page.goto(url, { waitUntil: 'networkidle0' });
await delay(2000);
const data = await page.evaluate(() => {
let headTHS = Array.from(document.querySelectorAll("#tblAcctBal > thead > tr > th"));
let footTHS = Array.from(document.querySelectorAll("#tblAcctBal > tfoot > tr > th"));
return { "headTHS": headTHS, "footTHS": footTHS }
});
let myArr = [];
console.log('---------');
console.log(data);
// console.log(data.headTHS);
// console.log(data.footTHS);
console.log('---------');
for (let i = 0; i < data.headTHS.length; i++) {
try {
let headTh = data.headTHS[i].textContent;
// console.log(headTh);
let footTh = data.footTHS[i].textContent;
// console.log(footTh);
console.log('---------');
myArr.push({ headTH: footTH });
} catch (error) {
// console.log(error);
myArr = error;
}
}
// const pageTitle = await page.title();
await browser.close();
// console.log(pageTitle);
return {
statusCode: 200,
body: JSON.stringify({
message: myArr
})
}
};
output:
{
headTHS: [
{ jQuery111105698387845515578: 79 },
{ jQuery111105698387845515578: 80 },
{ jQuery111105698387845515578: 81 },
{ jQuery111105698387845515578: 82 },
{ jQuery111105698387845515578: 83 },
{ jQuery111105698387845515578: 84 },
{ jQuery111105698387845515578: 85 },
{ jQuery111105698387845515578: 86 },
{ jQuery111105698387845515578: 87 },
{ jQuery111105698387845515578: 88 }
],
footTHS: [
{}, {}, {}, {}, {},
{}, {}, {}, {}, {}
]
}
[
I've tried the selectors in the devtool console and they appear correct. How do I access the text in the header and footer th tags?