How do I collect scraped data from multiple pages into a list?

Question

I am working on a scraping project using selenium in python. Here is my current code:

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
from selenium.common.exceptions import NoSuchElementException
import csv

driver = webdriver.Chrome("/Users/nzalle/Downloads/chromedriver")
driver.get("https://directory.bcsp.org/")
count = int(input("Number of Pages to Scrape: "))

body = driver.find_element_by_xpath("//body")
profile_count = driver.find_elements_by_xpath("//div[@align='right']/a")

while len(profile_count) < count:   # Get links up to "count"
    body.send_keys(Keys.END)
    sleep(1)
    profile_count = driver.find_elements_by_xpath("//div[@align='right']/a")

for link in profile_count:   # Calling up links
    temp = link.get_attribute('href')   # temp for
    driver.execute_script("window.open('');")    # open new tab
    driver.switch_to.window(driver.window_handles[1])   # focus new tab
    driver.get(temp)

    # Scrape Code
    Name = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[1]/div[2]/div').text or driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[1]/div[2]/div').text

    IssuedBy = "Board of Certified Safety Professionals"

    CertificationNumber = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[1]/td[3]/div[2]').text or driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[1]/td[3]/div[2]').text

    CertfiedSince = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[3]/td[1]/div[2]').text or driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[3]/td[1]/div[2]')

    RecertificationCycleORExperation = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[3]/td[3]/div[2]').text or driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[3]/td[3]/div[2]')

    try:
        AccreditedBy = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[5]/td[3]/div[2]/a').text

    except NoSuchElementException:
        AccreditedBy = "N/A"

    try:
        Expires = driver.find_element_by_xpath('/html/body/table/tbody/tr/td/table/tbody/tr/td[5]/div/table[1]/tbody/tr/td[3]/table/tbody/tr[5]/td[1]/div[2]').text

    except NoSuchElementException:
        Expires = "N/A"

    Data = [Name + "," + IssuedBy + "," + CertificationNumber + "," + CertfiedSince + "," + RecertificationCycleORExperation + "," + Expires + "," + AccreditedBy + '\n']

    with open('data.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        h = ["Name", "Issued By", "Certification Number", "Certified Since", "Recertification Cycle/Expiration",
             "Expires", "Accredited By"]
        writer.writerow(h)
        writer.writerow([Data] * len(h))
    driver.close()
    driver.switch_to.window(driver.window_handles[0])
driver.close()

After I scrape data from one of the profiles, I move on to the next, but the data from the first is lost. I was wondering how I would be able to collect the data from all of the profiles and put them into a list or something so that I can export them all into the CSV file.

Your for loop needs to run while `data.csv` is open. Currently, you're writing over `data.csv` every iteration. If you want to append to an existing csv then: https://stackoverflow.com/questions/4706499/how-do-you-append-to-a-file-in-python — qorka, Jun 16 '20 at 23:54
Is there any way that you could provide an example using my code? Sorry, I am new to this and am still trying to learn — nzalle, Jun 17 '20 at 00:43

How do I collect scraped data from multiple pages into a list?

0 Answers0