I'm learning and building a web crawler for my work. So far I have manage to scrape the first data at the website. I want to scrape every data available in the website but soon I realized that the data row that I need to click has different data key-value, so I am currently stuck on how to make a loop that I can write the code to click the next data and scrape the information.
I'm using puppeteer to scrape the data from the website.
image for the data value that i need to click
const puppeteer = require ('puppeteer')
const fs = require("fs/promises")
async function start (){
const browser = await puppeteer.launch()
const page = await browser.newPage();
await page.goto("https://startupjobs.asia/job/search?q=&job-list-dpl-page=1", {timeout: 3000000})
await page.click('#job-post-public-list-item-0-X2cA7J6ONt2ZNAefl70c1xVDJ > div.col.s12.m9.l9 > div > h5 > a')
const job_name = await page.evaluate(() => {
return Array.from(document.querySelectorAll(".detail-header h5")).map(x => x.textContent)
})
const company = await page.evaluate(() => {
return Array.from(document.querySelectorAll("#suj-single-jobdetail-wrapper > div.detail-header > div.company-profile.row > div > h6:nth-child(1) > a")).map(x => x.textContent)
})
const job_type = await page.evaluate(() => {
return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-jobs > div:nth-child(1) > div:nth-child(3) > p")).map(x => x.textContent)
})
const salary = await page.evaluate(() => {
return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-jobs > div:nth-child(1) > div:nth-child(1) > p")).map(x => x.textContent)
})
const skills = await page.evaluate(() => {
return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-jobs > div:nth-child(1) > div:nth-child(4) > p")).map(x => x.textContent)
})
const job_description = await page.evaluate(() => {
return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-jobs > div:nth-child(2) > div:nth-child(1) > div")).map(x => x.textContent)
})
const job_requirement = await page.evaluate(() => {
return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-jobs > div:nth-child(2) > div:nth-child(3) > div")).map(x => x.textContent)
})
const job_responsibility = await page.evaluate(() => {
return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-jobs > div:nth-child(2) > div:nth-child(2) > div")).map(x => x.textContent)
})
const industry = await page.evaluate(() => {
return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-jobs > div:nth-child(1) > div:nth-child(2) > p")).map(x => x.textContent)
})
const country = await page.evaluate(() => {
return Array.from(document.querySelectorAll("#suj-single-jobdetail-wrapper > div.detail-header > div.company-profile.row > div > h6:nth-child(2) > a")).map(x => x.textContent)
})
await page.evaluate(() => document.querySelector("#suj-single-jobdetail-wrapper > div.detail-body > div.row > div.col.s12.tabs-wrapper.suj-company-review-tabs-wrapper > ul > li:nth-child(2) > a").click())
const website = await page.evaluate(() => {
return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-startup > div.row.job-categories > div:nth-child(1) > p > a")).map(x => x.textContent)
})
const about = await page.evaluate(() => {
return Array.from(document.querySelectorAll("#navtab-X2cA7J6ONt2ZNAefl70c1xVDJ-startup > div:nth-child(2) > div:nth-child(1) > div > p:nth-child(2)")).map(x => x.textContent)
})
await fs.writeFile("txt/jobs.txt", job_name.join("\r\n"))
await fs.writeFile("txt/company.txt", company.join("\r\n"))
await fs.writeFile("txt/job type.txt", job_type.join("\r\n"))
await fs.writeFile("txt/salary.txt", salary.join("\r\n"))
await fs.writeFile("txt/skills.txt", skills.join("\r\n"))
await fs.writeFile("txt/job description.txt", job_description.join("\r\n"))
await fs.writeFile("txt/job requirement.txt", job_requirement.join("\r\n"))
await fs.writeFile("txt/job responsibility.txt", job_responsibility.join("\r\n"))
await fs.writeFile("txt/industry.txt", industry.join("\r\n"))
await fs.writeFile("txt/country.txt", country.join("\r\n"))
await fs.writeFile("txt/website.txt",website.join("\r\n"))
await fs.writeFile("txt/about.txt",about.join("\r\n"))
await browser.close()
}
start()