I have a excel sheet with metadata with 3 fields (path,folder_structure,filename)
Path: it is the path of source file in s3 source bucket folder_structure: new folder structure that need to be created in Target bucket filename: this is the filename that need to be renamed after copying to target bucket
I have below code working in windows source folder and creating target folder and copying data to target folder. need to modify this to source from s3 bucket and load it another s3 bucket.
code:
import pandas as pd import os,shutil from pathlib import Path
data = pd.read_excel('c:\data\sample_requirement.xlsx',engine='openpyxl')
root_dir = 'source'
for rec in range(len(data)):
#Replacing the '|' symbol with backward slash
dire = data['folder_structure'][rec].replace('|','\\')
#appending root directory with folder structure
directory = root_dir+'\\'+dire
#print(directory)
#Checking if path exists, if exit-> skip else-> create new
if not os.path.exists(directory):
#print('Not exist')
#creating new directory
os.makedirs(directory)
#Path in the excel
path = data['path'][rec]
#Filenames to change
filename = data['filename'][rec]
#print(filename)
if not os.path.isfile(directory + filename) :
#Copying the files to created path
shutil.copy(path,directory)
#Renaming the files
try:
os.rename(directory + os.path.basename(path),directory + filename)
except FileExistsError as e:
print('File Name already Exists')