I've some code that uses Puppeteer that I am using to grab random words from a website that generates them. The process simulates a drop-down click, entry, and mouse click and then grabs text generated on the next page and saves it into a .json file in the cwd.
I am trying to figure out the best way to display some form of a progress descriptor of the happenings like a percentage (as I would like to have the Puppeteer code run in headless mode) but I cannot figure how I might get that to work under my current set up. Does anyone have any suggestions? Also, this is my first quarter in JS so I understand I may have done things in ways that are not polished, please be kind.
// instantiate puppeteer
const puppeteer = require('puppeteer');
// function call for puppeteer
async function launchSearch(){
// website that generates random words where the words will come from
const url = 'https://www.sodacoffee.com/words/list-generator';
// button div id to click and generate more word searches
const buttonClick = '#ctl00_ContentPane_btn';
// div id for the drop-down selector on the url asking how many results we want
const numResultsPerClick = '#ctl00_ContentPane_resultscounter';
// puppeteer browser launch options
const browser = await puppeteer.launch({
// headless == no graphical representation of the browser
headless: false,
});
// create new browser element named page
const page = await browser.newPage();
// go to word generator URL
await page.goto(url);
// variable searches DOM for div id (declared above)
const numWordsScrape = await page.$(numResultsPerClick);
// select the element
await numWordsScrape.click()
// type 50 to return 50 random words
await numWordsScrape.type('50');
// to prevent some errors due to promises, Promise.all() seemed to be best to get results
await Promise.all([
// wait until the page has loaded (url)
page.waitForNavigation(),
// click button
page.click(buttonClick)
])
// site *should* have advanced forward to the next page with 50 results
const textAfterButtonClick = await page.evaluate(
// create an array from the results of the query to the DOM, and map those specific elements <tr> -> to their inner text values
() => Array.from(document.querySelectorAll('#ctl00_ContentPane_GridView1 tbody tr')
).map((elem) => elem.innerText.trim())
);
// instantiate file handling
const fs = require('fs');
const file = 'word.txt';
fs.writeFileSync('./words.json', JSON.stringify(textAfterButtonClick), err => err ?
console.log(err): null);
// close instance of browser
await browser.close();
}
launchSearch();