As the title suggests, I'm trying to scrape data from a public tableau table. Use https://health.wyo.gov/publichealth/infectious-disease-epidemiology-unit/disease/novel-coronavirus/covid-19-map-and-statistics/ for this example. This is the source of the iframe that contains the Tableau table, which I use in my code.
I've looked at this and this which both have similar answers by the same person (@Bertrand Martel). I need this in js, so I've converted it to the following code:
const puppeteer=require('puppeteer');
const fetch=require('node-fetch');
wy = async function() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto("https://public.tableau.com/views/EpiCOVIDtest/Dashboard?:embed=y&:showVizHome=no&:host_url=https%3A%2F%2Fpublic.tableau.com%2F&:embed_code_version=3&:tabs=no&:toolbar=no&:animate_transition=yes&:display_static_image=no&:display_spinner=no&:display_overlay=yes&:display_count=yes&publish=yes&:loadOrderID=0", {waitUntil: "networkidle2"});
await page.setViewport({ width: 1366, height: 663 });
let url = await page.evaluate(() => {
let props = JSON.parse(document.querySelectorAll('#tsConfigContainer')[0].innerText);
let sessid = props['sessionid'];
let root = props['vizql_root'];
return 'https://public.tableau.com'+root+'/bootstrapSession/sessions/'+sessid;
});
const getTableauData = await fetch(url);
await browser.close();
return getTableauData;
}
wy()
.then(function(result) {
console.log(result);
})
But this just returns
Response {
size: 0,
timeout: 0,
[Symbol(Body internals)]: {
body: Gunzip {
_writeState: [Uint32Array],
_readableState: [ReadableState],
readable: true,
_events: [Object: null prototype],
_eventsCount: 3,
_maxListeners: undefined,
_writableState: [WritableState],
writable: false,
allowHalfOpen: true,
_transformState: [Object],
_hadError: false,
bytesWritten: 238,
_handle: [Zlib],
_outBuffer: <Buffer 0d 0a 0d 0a 0d 0a 3c 73 63 72 69 70 74 20 74 79 70 65 3d 22 74 65 78 74 2f 6a 61 76 61 73 63 72 69 70 74 22 20 73 72 63 3d 22 68 74 74 70 73 3a 2f 2f ... 16334 more bytes>,
_outOffset: 297,
_chunkSize: 16384,
_defaultFlushFlag: 2,
_finishFlushFlag: 2,
_defaultFullFlushFlag: 3,
_info: undefined,
_level: -1,
_strategy: 0
},
disturbed: false,
error: null
},
[Symbol(Response internals)]: {
url: 'https://public.tableau.com/vizql/w/EpiCOVIDtest/v/Dashboard/bootstrapSession/sessions/52B9109EE67042418FB717DEAA0E9819-0:0',
status: 404,
statusText: 'Not Found',
headers: Headers { [Symbol(map)]: [Object: null prototype] },
counter: 0
}
}
yet somehow Bertrand's code returns the proper data. What's even more interesting is that in Bertrand's code, the url it constructs (with the root and session id) leads to a "Page not found" page, and so does mine, yet Bertrand is able to get the data from that page, while I can't get data from my page.
Does anyone know how to help?
Thanks.