Sorry for providing the large code here. I am not sure where the real problem is. I want to transfer data from multiple csv files compressed within tar.gz file to the SQL server. Each csv file should be a table in SQL server.
import sqlalchemy, pyodbc, zipfile, os, gzip,tarfile, pandas
from StringIO import StringIO
from sqlalchemy import create_engine
engine = create_engine('mssql+pyodbc:...This is correct, I checked!')
infile="mysql-2016-09-05.tar.gz"
def linecount(infile, member):
lc = 0
with gzip.GzipFile(infile) as zipf:
with tarfile.TarFile(fileobj=zipf) as tarf:
dataf = tarf.extractfile(member)
while dataf.readline():
lc += 1
dataf.close()
return lc
chunksize = 10000
os.chdir("from file path: This is correct, I checked!")
tar = tarfile.open(infile)
for member in tar.getnames():
if member.endswith("csv"):
nlines=linecount(infile,member)
for i in range(0, nlines, chunksize):
df = pandas.read_csv(member,sep=',', nrows=chunksize, skiprows=i)
filename=member.split("/")[1]
df.to_sql(filename, engine, if_exists='append',index= False, index_label=None, chunksize=chunksize)
else:
continue
tar.close()
engine.dispose()
I get the error:
Traceback (most recent call last):
File "tar3.py", line 26, in <module>
df = pandas.read_csv(member,sep=',', nrows=chunksize, skiprows=i)
.
.
IOError: File mysql-2016-09-05/commit_comments.csv does not exist
Please advise the fix. Thanks!