In the frist. I very want to thanks @cruisepandey was help me in this topic: How to crawl question and answer of Google People Also Ask with Selenium and Python?
So I was used his code like this:
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
driver.implicitly_wait(30)
wait = WebDriverWait(driver, 30)
driver.get("https://www.google.com/search?q=How%20to%20make%20bakery%3F&source=hp&ei=j0aZYYjRAvja2roPrcWcyAU&iflsig=ALs-wAMAAAAAYZlUn4NMUPjfIpQmrXSmjIDnaWjJXWIJ&ved=0ahUKEwjI1JDn0Kf0AhV4rVYBHa0iB1kQ4dUDCAc&uact=5&oq=How%20to%20make%20bakery%3F&gs_lcp=Cgdnd3Mtd2l6EAMyBAgAEBMyBAgAEBMyBAgAEBMyBAgAEBMyBAgAEBMyBAgAEBMyBAgAEBMyBAgAEBMyBAgAEBMyBAgAEBNQAFgAYJMDaABwAHgAgAF-iAF-kgEDMC4xmAEAoAECoAEB&sclient=gws-wiz")
all_questions = driver.find_elements(By.XPATH, "//span[text()='People also ask']/../following-sibling::div/descendant::div[@data-hveid and @class and @jsname and @data-ved]")
print(len(all_questions))
j = 1
for question in all_questions:
time.sleep(1)
ele = driver.find_element(By.XPATH, f"(//span[text()='People also ask']/../following-sibling::div/descendant::div[@data-hveid and @class and @jsname and @data-ved])[{j}]")
j = j + 2
ele.click()
time.sleep(1)
answer = ele.find_element(By.XPATH, ".//../following-sibling::div").get_attribute('innerText')
print(answer)
print('--------------')
This code very helpful. But I want to ask two question.
- When click to show answer. If not use time.sleep(1), I will use wait until answer show, how to get exactly class and code to wait answer show?
- Code will have problem if internet slowly. When click more result will not displays. I was try used wait until invisibility_of_element_located to load icon. But I catch Xpath not right. Have any way to do that? This is my code update from code of @cruisepandey:
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
driver.implicitly_wait(30)
wait = WebDriverWait(driver, 30)
driver.get("https://www.google.com/search?q=How%20to%20make%20bakery%3F&source=hp&ei=j0aZYYjRAvja2roPrcWcyAU&iflsig=ALs-wAMAAAAAYZlUn4NMUPjfIpQmrXSmjIDnaWjJXWIJ&ved=0ahUKEwjI1JDn0Kf0AhV4rVYBHa0iB1kQ4dUDCAc&uact=5&oq=How%20to%20make%20bakery%3F&gs_lcp=Cgdnd3Mtd2l6EAMyBAgAEBMyBAgAEBMyBAgAEBMyBAgAEBMyBAgAEBMyBAgAEBMyBAgAEBMyBAgAEBMyBAgAEBMyBAgAEBNQAFgAYJMDaABwAHgAgAF-iAF-kgEDMC4xmAEAoAECoAEB&sclient=gws-wiz")
all_questions = driver.find_elements(By.XPATH, "//span[text()='People also ask']/../following-sibling::div/descendant::div[@data-hveid and @class and @jsname and @data-ved]")
print(len(all_questions))
j = 1
for question in all_questions:
time.sleep(1)
ele = driver.find_element(By.XPATH, f"(//span[text()='People also ask']/../following-sibling::div/descendant::div[@data-hveid and @class and @jsname and @data-ved])[{j}]")
j = j + 2
ele.click()
# Question 1: To waiting answer show.
timeout = 30
answer_css_class = 'wWOJcd'
try:
is_question_show = WebDriverWait(driver, timeout).until(
EC.presence_of_element_located((By.CLASS_NAME, answer_css_class))
)
except TimeoutException:
pass
time.sleep(1)
answer = ele.find_element(By.XPATH, ".//../following-sibling::div").get_attribute('innerText')
# Question 2: To waiting G loading icon when click more answer with slow internet.
loading_element_xpath = '/html/body/div[7]/div/div[9]/div[1]/div/div[2]/div[2]/div/div/div[2]/div/div/div[1]/g-loading-icon'
try:
is_question_show = WebDriverWait(driver, timeout).until(
EC.presence_of_element_located((By.XPATH, loading_element_xpath))
)
except TimeoutException:
pass
print(answer)
print('--------------')
So have any way to not use time.sleep and use wait until in selenium?