I am working on a task where I receive several excel files containing similar information. However, every excel sheet has its own naming convention, and I have to manually change the column names in excel. Additionally, the columns might get shuffled in the next iteration. So basically, there is no fixed format of the source files currently in place.
your text
consu_mapping = {
'CUSTOMERID': ['CUSTOMERID','CustomerID', 'Customer_ID','CUSTOMER ID', 'ID NO', 'OTHER ID NUMBER', 'CUSTOMER_NO', 'CUSTOMER_NUMBER', 'CUST_NO', 'CUSTOMER_KEY', 'SUBJECT IDENTIFICATION NUMBER', 'CUSTCODE', 'CUSTOMER_ID', 'ID', 'CUSTOMER ID (OR CUSTOMER NUMBER)', 'SUBJECT ID NO', 'SUBJECT__IDENTIFICATION__NUMBER', 'CUSTOMERS NUMBER', 'SHEET1$.CUSTOMERID', 'ID_NO', 'CONSUMER ID'],
'BRANCHCODE': ['Branch_Code','BRANCHCODE','Branch Code', 'Branch_Code', 'BRANCH ID', 'BRANCH IDENTIFICATION NUMBER'],
'SURNAME': ['Surname', 'Surname', 'SURNAME', 'FULL_NAME', 'MF_ASSOCIATION_NAME', 'DESCRIPTION', 'SUB_FULL_NAME', 'FULL NAME', 'SUBJECT FULL NAME', 'SURNAME FIRST NAME OTHER NAMES', 'FULL NAMES', 'ACCT_NAME', 'NAME', "CUSTOMER'S NAME", 'BUSINESS NAME', 'LAST NAME', 'SUBJECT FAMILY NAME/ LAST NAME', 'SUBJECT__FAMILY NAME/__LAST NAME', 'REGISTERED_NAME', 'CUSTOMER NAME', 'BUSINESS_NAME', 'SURNAME 11'],
'FIRSTNAME': ['First name', 'first_name','FIRST_NAME', 'SUBJECT FIRST NAME', 'SUBJECT FIRST__NAME'],
'MIDDLENAME': ['Middle name', 'MIDDLENAME', 'OTHERNAMES', 'MIDDLE_NAME', 'OTHER NAMES', 'OTHER_NAME',
'OTHER NAME', 'SUBJECT MIDDLE NAME', 'SUBJECT__MIDDLE NAME'],
'DATEOFBIRTH': ['Date of Birth', 'BIRTHDATE', 'BIRTH DATE', 'DATE_OF_BIRTH', 'BIRTH DAY', 'DATE OF BIRTH (DD-MM-YYYY)', 'BIRTHDATE (DD/MM/YYYY)', 'DOB', 'BIRTH DAY/MONTH/YEAR', 'BIRTHDAY', 'BIRTHDATE (YYYYMMDD)', 'DATE OF BIRTH (YYYYMMDD)', 'DATE OF BIRTH (YYYY/MM/DD)', 'BIRTHDATE DD/MM/YYYY'],
'NATIONALIDENTITYNUMBER': ['NATIONALIDENTITYNUMBER','NATIONAL IDENTITY NUMBER','NATIONAL ID','NATIONAL_ID_CODE','NATIONAL_ID_NUMBER','NATIONAL_ID','NATIONAL_IDENTITY_NUMBER','NAT_ID_CARD_NUM','NATIONAL ID CODE','NATIONAL ID NUMBER','SUBJECT IDENTIFICATION NUMBER','SUBJECT NATIONAL IDENTIFICATION CODE','NIN'],
'DRIVERSLICENSENO': ['Drivers License No'],
'BVNNUMBER': ['BVN No','BANK VERIFICATION NUMBER','CUSTOMERBVN''OTHER ID NUMBER','BVN','BANK
BVN','BANKVERIFICATIONNUMBER'],
'PASSPORTNUMBER' : ('PASSPORTNUMBER','PASSPORT NO','PASSPORT NUMBER','PASSPORT','PASSPORT_NO','PASSPORT ID','PSPRT_NUM','SUBJECT INTERNATIONAL PASSPORT'),
'PENCOMIDNUMBER': ['PENCOMIDNUMBER', 'PENCOM ID NUMBER', 'PENCOM ID', 'PENCOM NUMBER', 'PENSION COMMISSION NUMBER', 'PENSION NUMBER', 'SUBJECT PENCOM ID NUMBER', 'SUBJECT__PENCOM ID NUMBER'],
'OTHERID' : ['OTHERID','OTHER ID','OTHER ID NUMBER','OTHER_ID_NUMBER','OTHER_ID','INDIVIDUAL TAX ID NO','OTHERIDNUMBER'],
'GENDER': ['gender','Gender', 'SEX', 'SUBJECT GENDER', 'SUBJECT__GENDER'],
'NATIONALITY': ['Nationality','nationality','NATIONALITY','SUBJECT NATIONALITY-COUNTRY','SUBJECT NATIONALITY COUNTRY','SUBJECT NATIONALITY- COUNTRY','NATIONALIT','National Identity Number', 'Nationality', 'COUNTRY', 'CITIZENSHIP', 'SUBJECT NATIONALITY', 'SUBJECT__NATIONALITY'],
'MARITALSTATUS': ['Marital Status', 'MARITAL STATUS','MARITAL_STATUS','CUST_MARITAL_STATUS','MARITAL'],
'MOBILENUMBER': ['Mobile number', 'PHONE', 'PHONE NUMBER', 'MOBILE', 'MOBILE NUMBER', 'TELEPHONE', 'TELEPHONE NUMBER', 'CONTACT NUMBER', 'SUBJECT PHONE', 'SUBJECT__PHONE', 'TEL', 'TEL NO', 'SUBJECT TEL NO', 'SUBJECT__TEL NO'],
'PRIMARYADDRESSLINE1': ['Primary Address Line 1', 'ADDRESS_1','ADDRESS_LINE_1','ADDRESS1_LINE1','RESD_ADDR1','ADDRESS TYPE 1','ADDRESS1','ADDRESS','RESIDENTIALADDRESS','ADDRESS TYPE 1 LINE 1','PRIMARY_ADDRESS_LINE1','CUST_PERM_ADDR1','RESIDENTIAL ADDRESS LINE1','PRIMARY ADDRESS LINE1','POSTAL ADDRESS1','BUSINESS OFFICE ADDRESS LINE1','RESIDENTIAL_ADDRESS_LINE_1','ADDRESS TYPE__1 LINE 1','PRIMARY_ADDRESS_LINE_1','ADDRESS_TYPE1','BUSLINE_TYPE1','Business Office Address Line 1','ADDRESSTYPE1LINE1','Location Address Line1','ADDRESS', 'RESIDENTIAL ADDRESS', 'HOME ADDRESS', 'SUBJECT ADDRESS', 'SUBJECT__ADDRESS', 'ADDR', 'LOCATION'],
'PRIMARYADDRESSLINE2': ['Primary Address Line 2','PRIMARY ADDRESS LINE 2','ADDRESS LINE 2','RESIDENTIALADDRESS2','RESIDENTIAL ADDRESS2','RESIDENTIAL ADDRESS 2','ADDRESS_2','ADDRESS_LINE_2','ADDRESS1_LINE2','ADDRESS TYPE 1 LINE 2','PRIMARY_ADDRESS_LINE2','CUST_PERM_ADDR2','RESIDENTIAL ADDRESS LINE2','PRIMARY ADDRESS LINE2','BUSINESS OFFICE ADDRESS LINE2','RESIDENTIAL_ADDRESS_LINE_2','PRIMARY_ADDRESS_LINE_2','BUSLINE_TYPE2','ADDRESS2','ADDRESSTYPE1LINE2','Location Address Line2'],
'PRIMARYCITYLGA': ['Primary city/LGA','PRIMARY CITY/LGA','ADDRESS LINE 3','ADDRESS CITY','RESIDENTIALADDRESS3','RESIDENTIAL ADDRESS 3','ADDRESS_3','ADDRESS_LINE_3','ADDRESS1_CITY','ADDRESS TYPE 1 CITY','LGA','ADDRESS TYPE 1 LGA CITY','PRIMARY_CITY_LGA','ADDRESS TYPE 1 LGA/CITY','ADDRESS TYPE1CITY LGA','LOCAL_GOVERNMENT_OF_ORIGIN','CITY_ADDR','CITY'],
'PRIMARYSTATE': ['Primary State','PRIMARY STATE','ADDRESS STATE','RESIDENTIALADDRESS4','RESIDENTIAL ADDRESS 4','STATE','ADDRESS1_STATE','ADDRESS TYPE 1 STATE','PRIMARY_STATE','ADDRESS TYPE1STATE','STATE_OF_ORIGIN','ADDRESS TYPE 1 CITY/ LGA','STATE_ADDR'],
'PRIMARYCOUNTRY': ['Primary Country','PRIMARY COUNTRY','ADDRESS COUNTRY','COUNTRY','ADDRESS1_COUNTRY','ADDRESS TYPE 1 COUNTRY','PRIMARY_COUNTRY','CUST_PERM_CNTRY_CODE','CUST_COMU_CNTRY_CODE','ADDRESS TYPE1COUNTRY','COUNT_ADDR'],
'EMPLOYMENTSTATUS': ['Employment Status','EMPLOYMENT STATUS','POSTAL CDE OF RES ADDR','EMPLOYMENT_STATUS','EMP_STATUS'],
'OCCUPATION': ['Occupation','OCCUPATION','Occupation','NATURE_OF_BUSINESS','EMPLOYER_DETAIL','OCCUP'],
'BUSINESSCATEGORY': ['Business Category','BUSINESSCATEGORY','BUSINESS_CATEGORY','BUS_CATE','business category','Business Category','business_category'],'BUSINESSSECTOR': ['Business Sector'],
'BUSINESS SECTOR': ['BUSINESSSECTOR','BUSINESS_SECTOR','BUS_SECTOR','business sector','Business Sector','business_sector'],
'BORROWERTYPE': ['Borrower Type','BORROWERTYPE','BORROWER_TYPE','BOROW_TYPE','borrower_type','Borrower Type','borrower type'],
'TAXID': ['Tax ID','TAXID','INDIVIDUAL TAX ID NO'],
'PICTUREFILEPATH': ['Picture File Path','PICTUREFILEPATH','PIC_FILE'],
}
def rename_columns(consu, consu_mapping):
for column in consu.columns:
for mapped_column, alt_names in consu_mapping.items():
if column in alt_names or column.lower() in alt_names or column.upper() in alt_names:
consu.rename(columns={column: mapped_column}, inplace=True)
break
return consu
the outcome was ['CUSTOMERID', 'BRANCHCODE', 'SURNAME', 'FIRSTNAME', 'MIDDLENAME',
'DATEOFBIRTH', 'NATIONALIDENTITYNUMBER', 'drivers_license_no', 'bvn_no',
'PASSPORTNUMBER', 'GENDER', 'NATIONALITY', 'MARITALSTATUS',
'mobile_number', 'PRIMARYADDRESSLINE1', 'PRIMARYADDRESSLINE2',
'PRIMARYCITYLGA', 'PRIMARYSTATE', 'PRIMARYCOUNTRY', 'EMPLOYMENTSTATUS',
'OCCUPATION', 'BUSINESSCATEGORY', 'BUSINESS SECTOR', 'BORROWERTYPE',
'OTHERID', 'TAXID', 'Picture_File_Path', 'email_address',
'EMPLOYERNAME', 'EMPLOYERADDRESSLINE1', 'EMPLOYERADDRESSLINE2',
'EMPLOYERCITY', 'EMPLOYERSTATE', 'EMPLOYERCOUNTRY', 'TITLE',
'PLACEOFBIRTH', 'WORKTELEPHONE', 'HOMETELEPHONE',
'SECONDARYADDRESSLINE1', 'SECONDARYADDRESSLINE2',
'SECONDARYADDRESSCITYLGA', 'SECONDARYADDRESSSTATE',
'SECONDARYADDRESSCOUNTRY', 'SPOUSESSURNAME', 'SPOUSESFIRSTNAME',
'SPOUSESMIDDLENAME'] instead of the keys provided: it converted for some and not all, what do I do?