How do I continuously scrape from a website?

Question

How would I be able to continuously and automatically scrape from a website. For example get the trending quotes every 6 hours from the tsx website and update my JSON file according? In other words, continuously execute my python script every 6 hours automatically

Reason behind this, I will be using my JSON file in my own personal website to output the data, styled in html and css. So whenever someone comes to my site, the content would have updated from the previous visit, if the tsx trending quotes updated.

# grabs all the trending quotes for that day
def getTrendingQuotes(browser):
    # wait until trending links appear, not really needed only for example
    all_trendingQuotes = WebDriverWait(browser, 10).until(
        lambda d: d.find_elements_by_css_selector('#trendingQuotes a')
    )
    return [link.get_attribute('href') for link in all_trendingQuotes]


def getStockDetails(url, browser):

    print(url)
    browser.get(url)

    quote_wrapper = browser.find_element_by_css_selector('div.quote-wrapper')
    quote_name = quote_wrapper.find_element_by_class_name(
        "quote-name").find_element_by_tag_name('h2').text
    quote_price = quote_wrapper.find_element_by_class_name("quote-price").text
    quote_volume = quote_wrapper.find_element_by_class_name(
        "quote-volume").text

    print("\n")
    print("Quote Name: " + quote_name)
    print("Quote Price: " + quote_price)
    print("Quote Volume: " + quote_volume)
    print("\n")

    convertToJson(quote_name, quote_price, quote_volume, url)


quotesArr = []

# Convert to a JSON  file
def convertToJson(quote_name, quote_price, quote_volume, url):
    quoteObject = {
        "url": url,
        "Name": quote_name,
        "Price": quote_price,
        "Volume": quote_volume
    }
    quotesArr.append(quoteObject)


def trendingBot(url, browser):
    browser.get(url)
    trending = getTrendingQuotes(browser)
    for trend in trending:
        getStockDetails(trend, browser)
    # requests finished, write json to file
    with open('trendingQuoteData.json', 'w') as outfile:
        json.dump(quotesArr, outfile)


def Main():

    chrome_options = Options()
    chrome_options.add_argument("--headless")
    # applicable to windows os only
    chrome_options.add_argument('--disable-gpu')

    url = 'https://www.tmxmoney.com/en/index.html'
    browser = webdriver.Chrome(
        r"C:\Users\austi\OneDrive\Desktop\chromeDriver\chromedriver_win32\chromedriver.exe", chrome_options=chrome_options)
    browser.get(url)

    os.system('cls')
    print("[+] Success! Bot Starting!")
    trendingBot(url, browser)
    # trendingBot(browser)
    browser.quit()


if __name__ == "__main__":
    Main()

Sounds like you just need a shell script / there's nothing wrong with your python file. Which OS? — Hatt, Dec 10 '18 at 20:20
As in scheduling your system to execute your .py file as needed. I usually use CRON within linux, but windows task scheduler should help. — Hatt, Dec 10 '18 at 20:26
This may be more helpful than my other link - https://stackoverflow.com/q/2725754/1313067 . I didn't downvote but it's probably because your question/content isn't really related to python/webscraping, rather scripting. — Hatt, Dec 10 '18 at 20:33
Ok thanks. Doesn't the task scheduler just run locally tho? I won't be able to use this if I hosted a website with my script? — pennyBoy, Dec 10 '18 at 20:36

How do I continuously scrape from a website?

0 Answers0