With a quick example, let's say I'd like to implement code for crawling some stock price data from a specific site. In a normal(synchronous) way, the code would be look like this:
import bs4
from requests_html import AsyncHTMLSession, HTMLSession
def get_response(site_url):
session = HTMLSession()
res = session.get(site_url)
return res
def get_data_list(site_url, past_date):
res = get_response(site_url)
soup = bs4.BeautifulSoup(res.text, 'lxml')
item_elements = soup.select('item')
price_dict_list = []
for item_element in item_elements:
date_time, open, high, low, close, volume = item_element.get('data').split('|')
price_dict_list.append(
{
'date_time': date_time,
'open': open,
'high': high,
'low': low,
'close': close,
'volume': volume
}
)
if date_time == past_date:
break
return price_dict_list
But when I'd like to implement it in an async way, while keeping the synchronous code as well, an additional code would be ADDED like this:
import bs4
from requests_html import AsyncHTMLSession, HTMLSession
def get_response(site_url):
session = HTMLSession()
res = session.get(site_url)
return res
def get_data_list(site_url, past_date):
res = get_response(site_url)
soup = bs4.BeautifulSoup(res.text, 'lxml')
item_elements = soup.select('item')
price_dict_list = []
for item_element in item_elements:
date_time, open, high, low, close, volume = item_element.get('data').split('|')
price_dict_list.append(
{
'date_time': date_time,
'open': open,
'high': high,
'low': low,
'close': close,
'volume': volume
}
)
if date_time == past_date:
break
return price_dict_list
async def get_response2(site_url):
session = AsyncHTMLSession()
res = await session.get(site_url)
return res
def get_data_list2(site_url, past_date):
res = await get_response2(site_url)
soup = bs4.BeautifulSoup(res.text, 'lxml')
item_elements = soup.select('item')
price_dict_list = []
for item_element in item_elements:
date_time, open, high, low, close, volume = item_element.get('data').split('|')
price_dict_list.append(
{
'date_time': date_time,
'open': open,
'high': high,
'low': low,
'close': close,
'volume': volume
}
)
if date_time == past_date:
break
return price_dict_list
But as you can see, these two functions have duplicated codes except the part of creating a Session. What annoying me is that every time I try to implement the code in both ways, I need to implement TWO functions and these functions always have duplicated logic.
What I want is a single interface like this:
- In testing through the IPython shell (or developing stage): call
get_data_list()
- In production: call
await get_data_list()
I know this gonna be hard to implement, when considering how the asyncio works, but removing the duplicated logic will be enough for me.
Is there any idea to write an efficient code in this situation?