So I'm building a program that scrapes Poshmark webpages and extracts the usernames of each seller on the page!
I want it to go through every page using the 'next' button, but theres 6 buttons all with the same class name...
Heres the link: https://poshmark.com/category/Men-Jackets_&_Coats?sort_by=like_count&all_size=true&my_size=false
(In my google chrome this page has an infinite scroll (hence the scrollToBottom
async function i started writing) but i realized inside puppeteer's chrome it has 'next page' buttons.)
The window displays page 1-5 and then the 'next page' button.
The problem is that all of the buttons share the same html class name, so I'm confused on how to differentiate.
const e = require('express');
const puppeteer = require('puppeteer');
const url = "https://poshmark.com/category/Men-Jackets_&_Coats?sort_by=like_count&all_size=true&my_size=false";
let usernames = [];
const initItemArea = async (page) => {
const itemArea = await page.evaluate(() => {
return Array.from(document.querySelectorAll('.tc--g.m--l--1.ellipses')).map(x => x.textContent);
});
}
const pushToArray = async (itemArea, page) => {
itemArea.forEach(function (element) {
//console.log('username: ', $(element).text());
usernames.push(element);
});
};
const scrollToBottom = async (itemArea, page) => {
while (true) {
previousHeight = await page.evaluate('document.body.scrollHeight');
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');
await page.waitForFunction(`document.body.scrollHeight > ${previousHeight}`);
await new Promise((resolve) => setTimeout(resolve, 1000));
await page.screenshot({path : "ss.png"})
}
};
const gotoNextPage = async (page) => {
await page.waitForSelector(".button.btn.btn--pagination");
const nextButton = await page.evaluate((page) => {
document.querySelector(".button.btn.btn--pagination")
});
await page.click(nextButton);
console.log('Next Page Loading')
};
async function main() {
const client = await puppeteer.launch({
headless: false,
executablePath: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
});
const page = await client.newPage();
await page.goto(url);
await page.waitForSelector(".tc--g.m--l--1.ellipses");
const itemArea = await page.evaluate(() => {
return Array.from(document.querySelectorAll('.tc--g.m--l--1.ellipses')).map(x => x.textContent);
});
gotoNextPage(page)
};
main();
Currently, my gotoNextPage
function doesnt even find the button, so i thought i'd entered the selector wrong...
Then when I went to find the selector, I realized all buttons have the same one anyway...
My html knowledge is basically nonexistent, but I want to finish this project out. All help is very appreciated.
Bonus: my initPageArea
function doesn't work when I call as a function like that, so I hardcoded it into main()...
I'll be diving deep into this problem later on, as I've seen it before, but any quick answers / direction would be awesome.
Thanks a lot.