I am trying to write a Selenium scraper that extracts the user handle of the given tweets but eventually the program hangs and the browser gives me the "Aw, Snap! Something went wrong while displaying this webpage. Error code: Out of Memory".
Tried Chrome and Edge, makes no difference. Tried different amount of RAM (the program is running inside of a Hyper-V VM): with 8G of RAM it takes 15 minutes (sometimes 20) to get this error, with 16 Gigs it always crashes between 28 and 29 minutes. The amount of RAM consumed by the actual browser tab is always lower at the time of the error than the maximum amount it consumed in the preceding minutes. The memory used by all of the processes doesnt go above 85% based on task manager.
How should I troubleshoot this issue? It doesnt seem like a browser problem. I know my code doesnt work perfectly, I try focusing this error right now.
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.webdriver.common.keys import Keys
import time
from datetime import datetime
def twitter_login(driver):
driver.get("https://twitter.com/login")
time.sleep(10)
login = driver.find_element_by_xpath('//*[@autocomplete="username"]')
time.sleep(1)
login.send_keys("USERHANDLE")
time.sleep(1)
login.send_keys(Keys.RETURN)
time.sleep(4)
login = driver.switch_to.active_element
time.sleep(1)
login.send_keys("EMAIL_ADDRESS")
time.sleep(1)
login.send_keys(Keys.RETURN)
time.sleep(4)
login = driver.switch_to.active_element
time.sleep(1)
login.send_keys("PASSWORD")
time.sleep(1)
login.send_keys(Keys.RETURN)
time.sleep(4)
def twitter_find(driver, text):
time.sleep(4)
find = driver.find_element_by_xpath('//input[@aria-label="Search query"]')
find.send_keys(Keys.CONTROL + "a")
time.sleep(1)
find.send_keys(Keys.DELETE)
time.sleep(1)
find.send_keys("#",text)
time.sleep(1)
find.send_keys(Keys.RETURN)
time.sleep(4)
find = driver.find_element_by_link_text("Latest").click()
time.sleep(4)
old_position = 0
UTCtime = datetime.utcnow().replace(microsecond=0)
start_time = datetime.utcnow()
driver = webdriver.Edge(EdgeChromiumDriverManager().install())
#driver = webdriver.Chrome(ChromeDriverManager().install())
driver.set_window_size(1366, 768)
twitter_login(driver)
twitter_find(driver, "bitcoin")
while True:
cards = driver.find_elements_by_xpath('//*[@data-testid="tweet"]')
if len(cards) > 10:
cards = cards[-10:]
for card in cards:
try:
userhandle = card.find_element_by_xpath('.//span[contains(text(), "@")]').text
except:
pass
print("Time: ", (datetime.utcnow() - start_time))
print(userhandle, "\n")
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
position = driver.execute_script("return document.body.scrollHeight")
if (position == old_position):
for i in range(1, 250, 10):
driver.execute_script("window.scrollBy(0, {});".format(-i))
time.sleep(1)
for i in range(1, 250, 10):
driver.execute_script("window.scrollBy(0, {});".format(i))
time.sleep(2)
old_position = position
driver.quit()