I'm literally new to python and trying to learn stuff. I have a python script that basically unzips from a compressed zipped folder from source and extracts all to the destination folder. Added I also wanted to delete the source contents once it is extracted. How would I achieve this ? thanks for help in advance!
Basically,inside this path "L:\Python\Source Zipped files" I have multiple zipped folders. My query, unzips each folder and extracts to the final destination. I'm looking for an approach, like first when it unzips the first folder, and extracts and then it should be deleted from the source folder. Included a snippet of how the source folder looks like.
Here is my query
import os
import zipfile
import shutil
import json
data_dir = r'L:\Python\Source Zipped files'
temp_dir = r'L:\Python\temp1'
new_dir = r'L:\Python\temp2'
final_dir = r'L:\Python\Destination Unzipped files'
big_list = os.listdir(data_dir)
archive_count = 0
file_count = 152865
basename1 = os.path.join(final_dir,'GENERIC_ROUGHDRAFT')
basename2 = os.path.join(final_dir,'XACTDOC')
my_time()
archive_count = len(big_list)
logging.info('Unzipping {} archives...'.format(archive_count))
for folder in big_list:
prior_count = file_count
logging.info('Starting: {}'.format(folder))
try:
shutil.rmtree(temp_dir)
except FileNotFoundError:
pass
os.mkdir(temp_dir)
with zipfile.ZipFile(os.path.join(data_dir,folder),mode='r') as a_zip:
a_zip.extractall(path = temp_dir)
archive_count += 1
logging.info('Cumulative total of {} archive(s) unzipped'.format(archive_count))
bigger_list = os.listdir(temp_dir)
logging.info('Current archive contains {} subfolders'.format(len(bigger_list)))
for sub_folder in bigger_list:
with zipfile.ZipFile(os.path.join(temp_dir,sub_folder),mode='r') as b_zip:
b_zip.extractall(path = new_dir)
file1 = "%s (%d).%s" % (basename1, file_count, 'xml')
file2 = "%s (%d).%s" % (basename2, file_count, 'xml')
shutil.copy(os.path.join(new_dir, 'GENERIC_ROUGHDRAFT.xml'), file1)
shutil.copy(os.path.join(new_dir, 'XACTDOC.xml'), file2)
file_count += 1
logging.info('{} subfolders unzipped'.format(file_count - prior_count))
my_time()
logging.info('Total of {0} files -- {1} pairs -- should be in {2}'.format(2*(file_count-1), file_count-1, final_dir))
time.sleep(1)
my_time()