I re-write the code and it works.
The reason why it didn't work is that I was trying to throw this code below into another integrated code.
Maybe there is something wrong during the mergering.
It is hard to combine different def(s) together.
Thx for the answers provided.
the code below works:
# import library
import os
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
# default parameters
desktop_path = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop')
edge_driver_path = desktop_path + r"\msedgedriver.exe"
# page url
url = "https://www.aljazeera.com/economy/2023/2/6/who-is-gautam-adani-and-why-is-he-controversial"
# xpath
new_title = "//header[@class='article-header']/h1"
new_brief = "//header[@class='article-header']//em"
new_par01 = "//main[@id='main-content-area']/div[2]/p[1]"
new_par02 = "//main[@id='main-content-area']/div[2]/p[2]"
new_par03 = "//main[@id='main-content-area']/div[2]/p[3]"
new_par04 = "//main[@id='main-content-area']/div[2]/p[4]"
new_par05 = "//main[@id='main-content-area']/div[2]/p[5]"
new_par06 = "//main[@id='main-content-area']/div[2]/p[6]"
new_par07 = "//main[@id='main-content-area']/div[2]/p[7]"
new_par08 = "//main[@id='main-content-area']/div[2]/p[8]"
new_par09 = "//main[@id='main-content-area']/div[2]/p[9]"
new_par10 = "//main[@id='main-content-area']/div[2]/p[10]"
xpath_list = [new_title, new_brief,
new_par01, new_par02, new_par03, new_par04, new_par05,
new_par06, new_par07, new_par08, new_par09, new_par10]
def paragraph_scraping(url, xpath_list):
# the Edge driver
s = Service(edge_driver_path)
driver = webdriver.Edge(service=s)
# open url
driver.get(url)
# manipulate browser windows to load information on page
driver.set_window_size(1024, 600)
driver.maximize_window()
driver.execute_script("window.scrollTo(0, 1000)")
time.sleep(0.5)
driver.execute_script("window.scrollTo(0, 500)")
time.sleep(0.5)
driver.execute_script("window.scrollTo(0, 300)")
time.sleep(0.5)
driver.execute_script("window.scrollTo(0, 100)")
time.sleep(1)
# create paragraph container
news_sentences = []
for xpath in xpath_list:
try:
a = WebDriverWait(driver, 0.5)
# title extract
b = a.until(EC.presence_of_element_located((By.XPATH, xpath)))
c = b.get_attribute('textContent')
news_sentences.append(c)
except:
pass
# join sentences
news_paragraph = "\n".join(news_sentences)
return news_paragraph
print(paragraph_scraping(url, xpath_list))