I was trying to collect some data from web programmatically for 6000 stocks, i used Python 3.6 selenium webdriver Firefox. [I intended to use BeautifulSoup to parse the HTML but it seems every-time when I update the web, the link doesn't change, soup doesn't cope with Javascript]
Anyway, When I create a for loop to do this, a specific row in my code, share_price = driver.find_element_by_css_selector(".highcharts-root > g:nth-child(25) > text:nth-child(2)")
, goes wrong most of the time (It worked a couple times though, So i believe my code is good). However, it works fine if I did it manually (copy and paste into Python IDLE and run it). I tried to use time.sleep(4)
to allow web to load before I salvage anything from background, but it seems this is not the solution. Now I'm running out of hint. Can anyone help me unravel this.
Below is my code:
from selenium import webdriver
import time
import pyautogui
filename = "historical_price_marketcap.csv"
f = open(filename,"w")
headers = "stock_ticker, share_price, market_cap\n"
f.write(headers)
driver = webdriver.Firefox()
def get_web():
driver.get("https://stockrow.com")
import csv
with open("TICKER.csv") as file:
read = csv.reader(file)
TICKER=[]
for row in read:
ticker = row[0][1:-1]
TICKER.append(ticker)
for Ticker in range(len(TICKER)):
get_web()
time.sleep(3)
pyautogui.click(425, 337)
pyautogui.typewrite(TICKER[Ticker],0.25)
time.sleep(2)
pyautogui.press("enter")
time.sleep(2)
pyautogui.click(268, 337)
pyautogui.press("backspace")
time.sleep(2)
pyautogui.typewrite('Stock Price',0.25)
time.sleep(2)
pyautogui.press("enter")
time.sleep(2)
pyautogui.click(702, 427)
for i in range(int(10)):
pyautogui.press("backspace")
time.sleep(2)
pyautogui.typewrite("2013-12-01",0.25)
pyautogui.press("enter")
time.sleep(2)
pyautogui.click(882, 425)
for k in range(10):
pyautogui.press("backspace")
time.sleep(2)
pyautogui.typewrite("2013-12-31",0.25)
pyautogui.press("enter")
time.sleep(2)
pyautogui.click(1317, 318)
for j in range(3):
pyautogui.press("down")
time.sleep(10)
share_price = driver.find_element_by_css_selector(".highcharts-root > g:nth-child(25) > text:nth-child(2)")
get_web()
time.sleep(3)
pyautogui.click(425, 337)
pyautogui.typewrite(TICKER[Ticker],0.25)
time.sleep(2)
pyautogui.press("enter")
time.sleep(2)
pyautogui.click(268, 337)
pyautogui.press("backspace")
time.sleep(2)
pyautogui.typewrite('Market Cap',0.25)
time.sleep(2)
pyautogui.press("enter")
time.sleep(2)
pyautogui.click(702, 427)
for i in range(int(10)):
pyautogui.press("backspace")
time.sleep(2)
pyautogui.typewrite("2013-12-01",0.25)
pyautogui.press("enter")
time.sleep(2)
pyautogui.click(882, 425)
for k in range(10):
pyautogui.press("backspace")
time.sleep(2)
pyautogui.typewrite("2013-12-31",0.25)
pyautogui.press("enter")
time.sleep(2)
pyautogui.click(1317, 318)
for j in range(3):
pyautogui.press("down")
time.sleep(10)
market_cap = driver.find_element_by_css_selector(".highcharts-root > g:nth-child(28) > text:nth-child(2)")
f.close()
it seems that the two lines that is bugging me is share_price = driver.find_element_by_css_selector(".highcharts-root > g:nth-child(25) > text:nth-child(2)")
Here is the error message from Python:
Traceback (most recent call last):
File "C:\Users\HENGBIN\Desktop\get_historical_data.py", line 65, in <module>
share_price = driver.find_element_by_css_selector(".highcharts-root > g:nth-child(25) > text:nth-child(2)")
File "E:\Program Files\python3.6.1\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 457, in find_element_by_css_selector
return self.find_element(by=By.CSS_SELECTOR, value=css_selector)
File "E:\Program Files\python3.6.1\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 791, in find_element
'value': value})['value']
File "E:\Program Files\python3.6.1\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 256, in execute
self.error_handler.check_response(response)
File "E:\Program Files\python3.6.1\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 194, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: Unable to locate element: .highcharts-root > g:nth-child(25) > text:nth-child(2)
It doesn't work most of the time in loop but works fine if I run it manually in Python IDLE. I don't know what is going on.........