I have many csv files in the path , and I hope to use pandas read_csv to read , then using pandas.concat to merge all return dataframe ,
but I think I do not use asyncio properly , because consumption of time did not shorten.
import asyncio
import time
import pandas as pd
import glob2
import os
async def read_csv(filename):
df = pd.read_csv(filename, header=None)
return df
t = time.time()
path = r'C:\LRM_STGY_REPO\IB_IN'
tasks = [asyncio.ensure_future(read_csv(i)) for i in list(glob2.iglob(os.path.join(path, "*.txt")))]
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
df = pd.concat([t.result() for t in tasks],ignore_index=True)
# print(df)
print( '%.4f' %(time.time()-t))
t = time.time()
def read_csv2(filename):
return pd.read_csv(filename, header=None)
df = pd.concat(map(read_csv2,glob2.iglob(os.path.join(path, "*.txt"))),ignore_index=True)
# print(df)
print( '%.4f' %(time.time()-t))
read_csv and read_csv2 have similar consumption time.
Or there are other ways to reduce the concat time .