I get a list from reading the columns of an excel file, I need to validate if a column is in the list, if not then create it empty, but the conditional is not working, I know for a fact that 'City' is missing so I'm expecting it to be created and is not.
files = os.listdir(input_path)
files_xlsx = [f for f in files if f[-5:] == ".xlsx"]
df = pd.DataFrame()
for f in files_xlsx:
wb = load_workbook(input_path + "\\" + f, read_only=True)
if 'New Leads' in wb.sheetnames:
df = pd.read_excel(input_path + "\\" + f, sheet_name="New Leads")
colre = [col for col in df.columns if "Email" in col]
for eo in colre:
df.rename(columns={eo: eo.replace(' ','').replace('*','').replace('**','') for eo in colre}, inplace=True)
dtcol = [col for col in df.columns if "FIRST NAME" in col.upper()]
for ex in dtcol:
df.rename(columns={ex: "First Name"}, inplace=True)
dtcol = [col for col in df.columns if "LAST NAME" in col.upper()]
for ex in dtcol:
df.rename(columns={ex: "Last Name"}, inplace=True)
NOT WORKING PIECE OF CODE
dtcol = [col for col in df.columns if "CITY" in col.upper()]
for ex in dtcol:
if len(dtcol)==0:
df['City'] = NaN
else:
df.rename(columns={ex: "City"}, inplace=True)
END OF NOT WORKING PIECE OF CODE
dtcol = [col for col in df.columns if "COMPANY NAME" in col.upper()]
for ex in dtcol:
df.rename(columns={ex: "*** Company"}, inplace=True)
if "SOURCE" in cols:
df['Email'].replace('', np.nan, inplace=True)
df.dropna(subset=['Email'], inplace=True)
if df.dtypes['SOURCE'] != 'float':
df.dropna(how = 'all')
df['File'] = f
if df.SOURCE.map(lambda x: len(x)).max() <= 10:
df = pd.merge(df, df_ac[["SOURCE", "Lead Source", "Lead Category"]], on="SOURCE", how='left')
del df['SOURCE']
df.rename(columns={"Lead Source": "SOURCE"}, inplace=True)
else:
df.rename(columns={"SOURCE": "Lead Source"}, inplace=True)
df = pd.merge(df, df_ac[["Lead Source", "Lead Category"]], on="Lead Source", how='left')
df.rename(columns={"Lead Source": "SOURCE"}, inplace=True)
df_xlsx = pd.concat([df, df_xlsx], ignore_index=True)
else:
df_ns = df_ns.append([f], ignore_index=True)
else:
df_ns = df_ns.append([f], ignore_index=True)
else:
df_ns = df_ns.append([f], ignore_index=True)