I have the following program to scrap data from a website. I want to improve the below code by using a generator with a yield instead of calling generate_url
and call_me
multiple times sequentially. The purpose of this exersise is to properly understand yield
and the context in which it can be used.
import requests
import shutil
start_date='03-03-1997'
end_date='10-04-2015'
yf_base_url ='http://real-chart.finance.yahoo.com/table.csv?s=%5E'
index_list = ['BSESN','NSEI']
def generate_url(index, start_date, end_date):
s_day = start_date.split('-')[0]
s_month = start_date.split('-')[1]
s_year = start_date.split('-')[2]
e_day = end_date.split('-')[0]
e_month = end_date.split('-')[1]
e_year = end_date.split('-')[2]
if (index == 'BSESN') or (index == 'NSEI'):
url = yf_base_url + index + '&a={}&b={}&c={}&d={}&e={}&f={}'.format(s_day,s_month,s_year,e_day,e_month,e_year)
return url
def callme(url,index):
print('URL {}'.format(url))
r = requests.get(url, verify=False,stream=True)
if r.status_code!=200:
print "Failure!!"
exit()
else:
r.raw.decode_content = True
with open(index + "file.csv", 'wb') as f:
shutil.copyfileobj(r.raw, f)
print "Success"
if __name__ == '__main__':
url = generate_url(index_list[0],start_date,end_date)
callme(url,index_list[0])
url = generate_url(index_list[1],start_date,end_date)
callme(url,index_list[1])