from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
OUTPUT_FILE_NAME = 'output0.txt'
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 10)
def get_text():
driver.get("http://law.go.kr/precSc.do?tabMenuId=tab67")
elem = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#viewHeightDiv >
table > tbody > "
"tr:nth-child(1) >
td.s_tit > a")))
title = elem.text.strip().split(" ")[0]
elem.click()
wait.until(EC.text_to_be_present_in_element((By.CSS_SELECTOR, "#viewwrapCenter h2"),
title))
content = driver.find_element_by_css_selector("#viewwrapCenter").text
return content
def main():
open_output_file = open(OUTPUT_FILE_NAME, 'w')
result_text = get_text()
open_output_file.write(result_text)
open_output_file.close()
main()
based on this code i want to crawl this website. like from the original url selenium goes into 1st link and save text to txt file and it goes back to original url and goes into 2nd link and keeps going but the problem is css_selector values for 1st link is #viewHeightDiv > table > tbody > tr:nth-child(1) > td.s_tit > a and 2nd link is #viewHeightDiv > table > tbody > tr:nth-child(3) > td.s_tit > a only difference between them is number after a child and it seems like has no rule it goes like 1,3,5,9,... so im stuck here...