0

I'm learning and building a web crawler for my work. So far I have manage to scrape the first data at the website. I want to scrape every data available in the website but soon I realized that the data row that I need to click has different data key-value, so I am currently stuck on how to make a loop that I can write the code to click the next data and scrape the information.

I'm using puppeteer to scrape the data from the website.

image for the data value that i need to click

const puppeteer = require ('puppeteer')
const fs = require("fs/promises")

async function start (){

    const browser = await puppeteer.launch()
    const page = await browser.newPage();

    await page.goto("https://startupjobs.asia/job/search?q=&job-list-dpl-page=1", {timeout: 3000000})

    await page.click('#job-post-public-list-item-0-X2cA7J6ONt2ZNAefl70c1xVDJ > div.col.s12.m9.l9 > div > h5 > a')

    const job_name = await page.evaluate(() => {
        return Array.from(document.querySelectorAll(".detail-header h5")).map(x => x.textContent)
    })
    const company = await page.evaluate(() => {
        return Array.from(document.querySelectorAll("#suj-single-jobdetail-wrapper > div.detail-header > div.company-profile.row > div > h6:nth-child(1) > a")).map(x => x.textContent)
    })
    const job_type = await page.evaluate(() => {
        return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-jobs > div:nth-child(1) > div:nth-child(3) > p")).map(x => x.textContent)
    })
    const salary = await page.evaluate(() => {
        return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-jobs > div:nth-child(1) > div:nth-child(1) > p")).map(x => x.textContent)
    })
    const skills = await page.evaluate(() => {
        return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-jobs > div:nth-child(1) > div:nth-child(4) > p")).map(x => x.textContent)
    })
    const job_description = await page.evaluate(() => {
        return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-jobs > div:nth-child(2) > div:nth-child(1) > div")).map(x => x.textContent)
    })
    const job_requirement = await page.evaluate(() => {
        return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-jobs > div:nth-child(2) > div:nth-child(3) > div")).map(x => x.textContent)
    })
    const job_responsibility = await page.evaluate(() => {
        return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-jobs > div:nth-child(2) > div:nth-child(2) > div")).map(x => x.textContent)
    })
    const industry = await page.evaluate(() => {
        return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-jobs > div:nth-child(1) > div:nth-child(2) > p")).map(x => x.textContent)
    })
    const country = await page.evaluate(() => {
        return Array.from(document.querySelectorAll("#suj-single-jobdetail-wrapper > div.detail-header > div.company-profile.row > div > h6:nth-child(2) > a")).map(x => x.textContent)
    })


    await page.evaluate(() => document.querySelector("#suj-single-jobdetail-wrapper > div.detail-body > div.row > div.col.s12.tabs-wrapper.suj-company-review-tabs-wrapper > ul > li:nth-child(2) > a").click())

    const website = await page.evaluate(() => {
        return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-startup > div.row.job-categories > div:nth-child(1) > p > a")).map(x => x.textContent)
    })
    const about = await page.evaluate(() => {
        return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-startup > div:nth-child(2) > div:nth-child(1) > div > p:nth-child(2)")).map(x => x.textContent)
    })

  
    await fs.writeFile("txt/jobs.txt", job_name.join("\r\n"))
    await fs.writeFile("txt/company.txt", company.join("\r\n"))
    await fs.writeFile("txt/job type.txt", job_type.join("\r\n"))
    await fs.writeFile("txt/salary.txt", salary.join("\r\n"))
    await fs.writeFile("txt/skills.txt", skills.join("\r\n"))
    await fs.writeFile("txt/job description.txt", job_description.join("\r\n"))
    await fs.writeFile("txt/job requirement.txt", job_requirement.join("\r\n"))
    await fs.writeFile("txt/job responsibility.txt", job_responsibility.join("\r\n"))
    await fs.writeFile("txt/industry.txt", industry.join("\r\n"))
    await fs.writeFile("txt/country.txt", country.join("\r\n"))
    await fs.writeFile("txt/website.txt",website.join("\r\n"))
    await fs.writeFile("txt/about.txt",about.join("\r\n"))


    await browser.close()
}

start()
Wave
  • 19
  • 7

0 Answers0