I want to get data within div loop, so that the values can be ordered in correct rows. Also, I want data from entire page, not only from the visible part of the page.
- How can I get Firm_name, Remediation_status, ... from div[@class='sc-kbGplQ bCRLdc']?
- The code below gives less than 20 rows, while total firms are 1800+. How can I scroll page and get data from entire page? Thanks in advance.
ruby = driver.find_elements(By.XPATH, "//div[@class='sc-kbGplQ bCRLdc']")
for i in ruby:
# actions.move_to_element(i).perform()
driver.execute_script("arguments[0].scrollIntoView();", i)
time.sleep(INTERVAL)
try:
Firm_name = [Firm_name.text for Firm_name in i.find_elements(By.XPATH, "//div[1]/h2[@class='sc-idjmjb jDJltL']")]
Remediation_status = [Remediation_status.text for Remediation_status in i.find_elements(By.XPATH, "//div[1]/span[2][@class='sc-iKpIOp iKvkEG']")]
Safety_training = [Safety_training.text for Safety_training in i.find_elements(By.XPATH, "//div[2]/span[2][@class = 'sc-iKpIOp iKvkEG']" )]
Worker_number = [Worker_number.text for Worker_number in i.find_elements(By.XPATH, "//div[1]/h2[@class='sc-bsVVwV gnfeLF']")]
Progress_rate = [Progress_rate.text for Progress_rate in i.find_elements(By.XPATH, "//div[2]/h2[@class= 'sc-bsVVwV gnfeLF']")]
except:
print("na")
#driver.execute_script("window.scrollBy(0,500)","")
time.sleep(INTERVAL)
df1 = pd.DataFrame(data=list(zip(Firm_name, Remediation_status, Safety_training, Progress_rate, Worker_number)), columns=['Firm_name', 'Remediation_status', 'Safety_training', 'Progress_rate', 'Worker_number'])
df1.to_csv('namefirm.csv')