Yesterday I asked for help putting together a Python script to loop through folders, check the contents of each, and print out a report with some basic stats on the files in these folders. Martin Prikryl pointed me in the direction of some code he developed a couple months back. I tried it and didn't get any errors, but didn't get any results either. Here is the code.
from ftplib import *
global ftp
import ftplib
import io
from io import StringIO
import string
import pandas as pd
from pandas.compat import StringIO
from collections import Counter
from ssl import SSLSocket
class FtpFile:
def __init__(self, ftp, name):
self.ftp = ftp
self.name = name
self.size = ftp.size(name)
self.pos = 0
def seek(self, offset, whence):
if whence == 0:
self.pos = offset
if whence == 1:
self.pos += offset
if whence == 2:
self.pos = self.size + offset
print("seek {}".format(self.pos))
def tell(self):
print("tell {}".format(self.pos))
return self.pos
def read(self, size = None):
if size == None:
size = self.size - self.pos
print("read {}".format(size))
data = ""
# based on FTP.retrbinary
# (but allows stopping after certain number of bytes read)
ftp.voidcmd('TYPE I')
cmd = "RETR {}".format(self.name)
conn = ftp.transfercmd(cmd, self.pos)
try:
while len(data) < size:
buf = conn.recv(min(size - len(data), 8192))
if not buf:
break
data += buf
# shutdown ssl layer (can be removed if not using TLS/SSL)
if SSLSocket is not None and isinstance(conn, SSLSocket):
conn.unwrap()
finally:
conn.close()
ftp.voidresp()
print("read {}".format(len(data)))
return data
# And then you can use it like:
ftp = FTP(portal, user_name, password)
ftp.cwd('/emm/') # folder that I'm trying to query
zipstring = StringIO()
print(zipstring)
name = "C:/Users/ryans/OneDrive/Desktop/archive.zip"
print(name)
size = ftp.size(name)
print(size)
ftp.retrbinary("RETR " + name, zipstring.write, rest = size - 1000*2024)
zip = zipfile.ZipFile(zipstring)
print(zip.namelist())
I would expect the results to get printed out somewhere, either in a text file or a CSV file, but I don't see anything printed out. Also, the code runs very, very slow, and never actually finishes. Again, I don't see a any results anywhere. The FTP portal that I'm looking at is around 7.6GB and it has 705 folders and files. I would like the get file names, dates when files were added/changed, size of each file, and if possible, record count in each file. Maybe the last thing is too hard to do. I would think the other things are doable.