I'm trying to scrape the header and footer of a table with node and cheerio. The entire table html is below , and my code is:
async function getTableFooter(html) {
const $ = cheerio.load(html, null, false);
let myArr = [];
const headerRow = $('thead > tr');
const footerRow = $("tfoot > tr");
const headThs = [...$(headerRow).find('th')];
const footThs = [...$(footerRow).find('th')];
for (let i = 0; i < headThs.length; i++) {
try {
const headTh = headThs[i];
console.log($(headTh).text());
const footTh = footThs[i];
console.log($(footTh).text());
console.log('---------');
// myArr.push({headTh:footTh});
} catch (error) {
console.log(error);
}
}
As I step through the code I see that there are 10 ths for both header and footer. When I try to print it out, the header inner text prints out as expected, but not the footer fields. Why not?
<table class="table table-striped table-condensed mt-0 mb-0 p-0 dataTable dtr-inline" width="100%" style="color: black; width: 100%;" id="tblAcctBal" role="grid">
<thead>
<tr class="text-center" role="row"><th class="sorting_disabled" rowspan="1" colspan="1" style="width: 26px;">PAY</th><th class="sorting_disabled" rowspan="1" colspan="1" style="width: 63px;">TAX YEAR</th><th class="sorting_disabled" rowspan="1" colspan="1" style="width: 68px;"><a href="#" role="button" class="btn popovers font-weight-bold pop p-0" data-toggle="popover" data-html="true" data-placement="top" data-trigger="focus" style="font-size:12px !important;" data-content="A Certificate Number is the number given when a Lien is purchased on the delinquent taxes of a property.
<b>Certificates must be redeemed in full</b>, and until redeemed, interest will accrue monthly at the percent the Lien Holder was awarded.
<br/>See Arizona Revised Statute <a href='https://www.azleg.gov/viewdocument/?docName=https://www.azleg.gov/ars/42/18104.htm' target='_blank'>42-18104</a> for further information." title="" data-original-title="<b>What is a Certificate?</b>"><u>CERT NO <i class="fa fa-info-circle text-primary"></i></u></a></th><th class="sorting_disabled" rowspan="1" colspan="1" style="width: 98px;">INTEREST DATE</th><th class="sorting_disabled" rowspan="1" colspan="1" style="width: 61px;">INTEREST<br>PERCENT</th><th class="sorting_disabled" rowspan="1" colspan="1" style="width: 60px;">AMOUNT</th><th class="sorting_disabled" rowspan="1" colspan="1" style="width: 61px;">INTEREST</th><th class="sorting_disabled" rowspan="1" colspan="1" style="width: 34px;">FEES</th><th class="sorting_disabled" rowspan="1" colspan="1" style="width: 70px;">PENALTIES</th><th class="sorting_disabled" rowspan="1" colspan="1" style="width: 73px;">TOTAL DUE</th></tr>
</thead>
<tbody>
<tr class="odd"><td valign="top" colspan="10" class="dataTables_empty">No data available in table</td></tr></tbody>
<tfoot>
<tr><th rowspan="1" colspan="1"></th><th rowspan="1" colspan="1"></th><th rowspan="1" colspan="1"></th><th rowspan="1" colspan="1"></th><th rowspan="1" colspan="1"></th><th rowspan="1" colspan="1">$0.00</th><th rowspan="1" colspan="1">$0.00</th><th rowspan="1" colspan="1">$0.00</th><th rowspan="1" colspan="1">$0.00</th><th rowspan="1" colspan="1">$0.00</th></tr>
</tfoot>
</table>
edit: output looks like:
Edit2:
I've rewritten the code as a minimal example (fetch refers to node-fetch)
const cheerio = require("cheerio");
const fetch = require('node-fetch');
const r = await fetch("https://www.to.pima.gov/propertyInquiry/?stateCodeB=129&stateCodeM=05&stateCodeP=0070");
const body = await r.text()
const outerHTML = cheerio.load(body);
const innerHTML = outerHTML('html').html();
const $ = cheerio.load('<html>' + innerHTML + '</html>', null, false);
const o = $.html();
const headTHS = $('#tblAcctBal > thead > tr > th');
const footTHS = $('#tblAcctBal > tfoot > tr > th');
for (let i = 0; i < headTHS.length; i++) {
try {
const headTh = headTHS[i];
console.log($(headTh).text());
const footTh = footTHS[i];
console.log($(footTh).text());
console.log('---------');
// myArr.push({headTh:footTh});
} catch (error) {
console.log(error);
}
}
The html contains the appropriate table including:
but I still do not get the footer fields, The console output looks like the first edit above . @ggorlen , you were right. I initially took the html from devtools to make things tidier!