I have a while
loop that is going through a list until there is no more Load more button.
inside the while
loop I have a for
that increases the row count. At the bottom of it I'm putting the name and the other descriptions that i want into an object.
I want the script to skip to the next name on the list if the name has already been scraped (I don't want it to even save the name).
I can't do a continue
because it throws an error that exerciseName
has not been declared yet. I've tried putting the declare at top of the page but then the variables inside of it haven't been declared yet. How can I have it go through the loop and skip the rest of the process if it has already scraped that name?
My code:
for (let i = 2; i < rowsCounts + 1; i++) {
// this is getting the exercise name
const exerciseName = await page.$eval(
`.ExCategory-results > .ExResult-row:nth-child(${i}) > .ExResult-cell > .ExHeading > a`,
(el) => el.innerText
);
// i've tried to do the continue here but it throws an error as the object hasn't been declared yet
// REST OF THE FANCY CODE HERE
let obj = {
exercise: exerciseName,
exerciseDescription: exerciseDescription,
AlternativeExercise: AlternativeExercise,
};
// I tried doing the continue here so it wouldn't push anything to the list but the problem is
// the script is opening bunch of tabs and it's way too much traffic plus it
// slows down things a lot. So it needs to be at the top so it can skip all those steps.
if (exerciseName !== obj.exercise) {
continue;
}
allData.push(obj);
}
update this is major part of my code:
const LoadMoreButton =
'#js-ex-content > #js-ex-category-body > .ExCategory-results > .ExLoadMore > .bb-flat-btn';
var buttonExists = true;
let allData = [];
while (buttonExists == true) {
const loadMore = true;
const rowsCounts = await page.$$eval(
'.ExCategory-results > .ExResult-row',
(rows) => rows.length
);
// console.log(`row counts = ${rowsCounts}`);
for (let i = 2; i < rowsCounts + 1; i++) {
const exerciseName = await page.$eval(
`.ExCategory-results > .ExResult-row:nth-child(${i}) > .ExResult-cell > .ExHeading > a`,
(el) => el.innerText
);
console.log(` ${i} = ${exerciseName}`);
if (exerciseName !== obj.exercise) {
let obj = {
exercise: 'remove',
exerciseDescription: '',
AlternativeExercise: '',
};
continue;
}
let ExerciseLink = await page.$eval(
`.ExCategory-results > .ExResult-row:nth-child(${i}) > .ExResult-cell > .ExHeading > a`,
(el) => el.getAttribute('href')
);
// console.log(`href = ${ExerciseLink}`);
const pageTab = await browser.newPage(); // open new tab
await pageTab.goto('https://www.bodybuilding.com' + ExerciseLink);
await pageTab.waitForSelector('#js-ex-content');
const exerciseDescription = await pageTab.$eval(
'#js-ex-content > .ExDetail > .ExDetail-section > .flexo-container > .grid-8',
(el) => el.innerHTML
);
// console.log(`${exerciseDescription}`)
// this returns the title to alternative exercises
const AlternativeExercise = await pageTab.evaluate(() => {
var links = document.querySelectorAll('h3.ExResult-resultsHeading a');
return Array.from(links).map((links) => { return links.innerHTML });
});
// console.log(`alternative workouts are: = ${AlternativeExercise}`);
// await page.goBack();
await pageTab.close();
let obj = {
exercise: exerciseName,
exerciseDescription: exerciseDescription,
AlternativeExercise: AlternativeExercise,
};
// allData.push(obj);
allData.filter(d => d.exercise !== 'remove');
}
// clicking load more button and waiting 1sec
try {
await page.click(LoadMoreButton);
}
catch (err) {
buttonExists = false;
}
await page.waitForTimeout(1000);
// await page.waitForNavigation({
// waitUntil: 'networkidle0',
// });
}
console.log(allData);
async function fn() {
// json export error part
jsonexport(allData, function (err, csv) {
if (err) return console.error(err);
console.log(csv);
fs.writeFileSync('DetailExercise.csv', csv);
});
}
fn();
await browser.close();