I have a script in python3 that I run on both windows and osx, but it hangs after showing 1 chart. I also wondering if I can make the scraping process on yahoo any faster.
import time
import urllib
import urllib.request
import Quandl
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
def yahooKeyStats(stock):
try:
sourceCode = urllib.request.urlopen('http://au.finance.yahoo.com/q/ks?s='+stock).read()
pbr = str(sourceCode).split('Price/Book (mrq):</td><td class="yfnc_tabledata1">')[1].split('</td>')[0]
if float(pbr) < 2.00:
PEG5 = str(sourceCode).split('PEG Ratio (5 yr expected)<font size="-1"><sup>1</sup></font>:</td><td class="yfnc_tabledata1">')[1].split('</td>')[0]
if 0 < float(PEG5) < 2:
PE12t = str(sourceCode).split('Trailing P/E (ttm, intraday):</td><td class="yfnc_tabledata1">')[1].split('</td>')[0]
ofInterest.append(stock)
print('______________________________\n'+
'stock meets requirements ' + stock + '\n' +
'price book ratio ' + pbr + '\n' +
'price earnings growth 5 years ' + PEG5 + '\n'+
'trailing PE 12 months ' + PE12t + '\n' +
'______________________________')
netIncome = Quandl.get(("DMDRN/"+stock+"_NET_INC"), trim_start="2002-12-31", trim_end="2012-12-31",
)
revenue = Quandl.get(("DMDRN/"+stock+"_REV_LAST"), trim_start="2002-12-31", trim_end="2012-12-31",
)
ROC = Quandl.get(("DMDRN/"+stock+"_ROC"), trim_start="2002-12-31", trim_end="2012-12-31",
)
print(netIncome, revenue, ROC)
plt.subplot(3,1,1)
plt.title(stock)
plt.ylabel("Net Income")
plt.plot(netIncome.index, netIncome)
plt.subplot(3,1,2)
plt.ylabel("Revenue")
plt.plot(revenue.index, revenue)
plt.subplot(3,1,3)
plt.ylabel("Return on Capital")
plt.xlabel("year")
plt.plot(ROC.index, ROC)
plt.show()
return ofInterest
except Exception as e:
print('failed in the main loop', str(e))
# pass
for eachStock in sp500u:
yahooKeyStats(eachStock)