I'm trying to use selenium in google colab to scrape the comments of YouTube video, but the script call the exception. When I run that same script on my local machine it works. The YouTube video website is dynamic, and I find that the return page source content doesn't return the full content on colab. However, I don't know how to figure it out.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
options = Options()
prefs = {"profile.managed_default_content_settings.images": 2}
options.add_experimental_option("prefs", prefs)
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--window-size=2560x1440")
options.add_argument("start-maximised")
options.add_argument('--disable-gpu')
driver = webdriver.Chrome(chrome_options=options)
driver.get('https://www.youtube.com/watch?v=yIYKR4sgzI8')
time.sleep(10)
driver.execute_script('window.scrollTo(1, 500);')
time.sleep(10)
# views_div = driver.find_element_by_xpath('//*[@id="info-contents"]')
# views = views_div.find_element_by_xpath('//*[@class="view-count style-scope yt-view-count-renderer"]')
comment_div=driver.find_element_by_xpath('//*[@id="contents"]')
comments=comment_div.find_elements_by_xpath('//*[@id="content-text"]')
# titles_div = driver.find_element_by_xpath('//*[@class="detail-content"]')
# titles = titles_div.find_element_by_xpath('//*[@class="title"]')
# print(driver.page_source)
for comment in comments:
print(comment.text)
driver.close()
Then, I get this exception.
NoSuchElementException Traceback (most recent call last)
<ipython-input-20-1833cad42017> in <module>()
26 # views = views_div.find_element_by_xpath('//*[@class="view-count style-scope yt-view-count-renderer"]')
27
---> 28 comment_div=driver.find_element_by_xpath('//*[@id="contents"]')
29 comments=comment_div.find_elements_by_xpath('//*[@id="content-text"]')
30
3 frames
/usr/local/lib/python3.6/dist-packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response)
240 alert_text = value['alert'].get('text')
241 raise exception_class(message, screen, stacktrace, alert_text)
--> 242 raise exception_class(message, screen, stacktrace)
243
244 def _value_or_default(self, obj, key, default):
NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//*[@id="contents"]"}
(Session info: headless chrome=80.0.3987.87)