My code is performing file creation using dataframe rows being put through a function in a loop.
When every row has been processed the loop returns None
which prevents my entire template from being fully injected with data.
How to stop that loop once all data has been processed to avoid getting None
as the last result?
def template_input_col(df,i):
col = df['ColumnName'].iat[i]
dt_type = df['SSISType_src'].iat[i]
dt_lngh = df['DataTypeLength_src'].iat[i]
dt_prc = df['DataTypePrecision_src'].iat[i]
dt_scl = df['DataTypeScale_src'].iat[i]
input_columns = f'''\
<inputColumn
refId="Package\DFT\DST.Inputs[DST Input].Columns[{col}]"
cachedDataType="{dt_type}"
cachedName="{col}"
cachedLength="{dt_lngh}"
cachedPrecision="{dt_prc}"
cachedScale="{dt_scl}"
externalMetadataColumnId="Package\DFT\DST.Inputs[DST Input].ExternalColumns[{col}]"
lineageId="Package\DFT\SRC.Outputs[SRC Output].Columns[{col}]" />'''
if dt_type in {'str','wstr','bytes'}:
input_columns = re.sub(r'\s*cachedPrecision=".*"', '', input_columns)
input_columns = re.sub(r'\s*cachedScale=".*"', '', input_columns)
elif dt_type in {'numeric'}:
input_columns = re.sub(r'\s*cachedLength=".*"', '', input_columns)
elif dt_type in {'decimal' , 'dbtime2' , 'dbTimeStamp2' , 'dbTimeStampoffset'}:
input_columns = re.sub(r'\s*cachedLength=".*"', '', input_columns)
input_columns = re.sub(r'\s*cachedPrecision=".*"', '', input_columns)
else:
#input_columns = input_columns
input_columns = re.sub(r'\s*cachedLength=".*"', '', input_columns)
input_columns = re.sub(r'\s*cachedPrecision=".*"', '', input_columns)
input_columns = re.sub(r'\s*cachedScale=".*"', '', input_columns)
return input_columns
def output_input_col():
for idx, row in df.iterrows():
if not pd.isna(row['DataTypeName_src']) and not pd.isna(row['DataTypeName_dst']):
return template_input_col(df,idx)
print(output_input_col())
Once data is processed it has to injected with below:
line = line.replace('<DST_Input_Columns_Placeholder>', output_input_col())
Expected result must retain the format of:
<inputColumn
refId="Package\DFT\DST.Inputs[DST Input].Columns[created_at]"
cachedDataType="dbTimeStamp"
cachedName="created_at"
externalMetadataColumnId="Package\DFT\DST.Inputs[DST Input].ExternalColumns[created_at]"
lineageId="Package\DFT\SRC.Outputs[SRC Output].Columns[created_at]" />
<inputColumn
refId="Package\DFT\DST.Inputs[DST Input].Columns[updated_at]"
cachedDataType="dbTimeStamp"
cachedName="updated_at"
externalMetadataColumnId="Package\DFT\DST.Inputs[DST Input].ExternalColumns[updated_at]"
lineageId="Package\DFT\SRC.Outputs[SRC Output].Columns[updated_at]" />
<inputColumn
refId="Package\DFT\DST.Inputs[DST Input].Columns[deleted_at]"
cachedDataType="dbTimeStamp"
cachedName="deleted_at"
externalMetadataColumnId="Package\DFT\DST.Inputs[DST Input].ExternalColumns[deleted_at]"
lineageId="Package\DFT\SRC.Outputs[SRC Output].Columns[deleted_at]" />