from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
from openpyxl import Workbook
import time
import pandas as pd
from pandas import ExcelWriter
# initial value for table data
total = []
result = []
#initial setting for excel file
wb = Workbook()
ws = wb.create_sheet()
filename = '/Users/sungyeon/Desktop/projects/text.xlsx'
writer = pd.ExcelWriter(filename)
#setting of crawling
driver = webdriver.Chrome('./chromedriver')
target_url = 'https://price.joinsland.joins.com/theme/index_theme.asp?sisaegbn=T05'
driver.get(target_url)
# selection of first dropbox
select1 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[@name='sido']"))))
select1.options[0].click()
# selection of second dropbox
select2 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[@name='gugun']"))))
# for loop for values of second dropbox
for i in range(0,3):
try:
select2.options[i].click()
title = select2.options[i].text
wb.create_sheet(title)
driver.implicitly_wait(5)
driver.find_element_by_class_name('btn_search').click()
# in case of stale element reference error
except StaleElementReferenceException as e:
select2 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[@name='gugun']"))))
select2.options[i].click()
title = select2.options[i].text
wb.create_sheet(title)
driver.find_element_by_class_name('btn_search').click()
driver.implicitly_wait(5)
# setting of table data from crawled webpage
driver.current_url
table = driver.find_element_by_class_name('tbl_box')
tbody = table.find_element_by_tag_name('tbody')
rows=tbody.find_elements_by_tag_name('tr')
# making lists of data from crawled data
for index, value in enumerate(rows):
body = value.find_elements_by_tag_name('td')
print('ok5')
for i in range(len(body)):
try:
data = body[i].text
result.append(data)
except StaleElementReferenceException as e:
body = value.find_elements_by_tag_name('td')
continue
# adding data of a row to list of final dataset
total.append(result)
# clearing temp list
result=[]
time.sleep(2)
# to create a new sheet named after the name of dropbox value
# forming dataframe from list of dataset
df = pd.DataFrame.from_records(total)
# converting to excel file into the sheet named after the name of dropbox value
df.to_excel(writer, sheet_name = title)
writer.save()
wb.save(filename=filename)
I have tried web-scraping using selenium. data collecting part works fine but the problem is I get no data exported to the excel file. File, sheet creation are done well but I still get no data written on each sheet I guess it may have something to do with indentation but can't figure it out. Or any tips would be appreciated! Many thanks!