0

Unable to resolve this after looking at a bunch of similar answers.

The CSV I have only has the last line of the Dataframe printing.

I need the whole dataframe to be logged in the CSV & parquet file.

for r in records:
    content = '-----BEGIN CERTIFICATE-----' + '\n' + \
            r[1] + '\n'+'-----END CERTIFICATE-----'

    try:
        cert = x509.load_pem_x509_certificate(str.encode(content))
        cert_policy_value = cert.extensions.get_extension_for_oid(
        ExtensionOID.CERTIFICATE_POLICIES).value

        for ext in cert_policy_value:
            policy_check = ext.policy_identifier.dotted_string
            # logging.info(ext.policy_identifier.dotted_string)

        #Check whether the cert policy oid is Qualified or Non-QF        
        if policy_check in qualified_qv_cert_oid:
            flag = 'Non-QF'
            logging.info('NON-QLFY')
        else:
            flag = 'QLFY'

    except BaseException as e:
        logging.error(f'Error found for cert: {e}')
        pass

    #Prepaing the Dataframe to write to a parquet file
    df = pd.DataFrame([{'id': r[0], 'flag':flag}])
    df.insert(2, 'timestamp', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
    df.to_csv('qv_output.csv', index=False, encoding='utf-8')
    df.to_parquet(path='qv_parsing.parquet', engine='auto', compression='snappy', index=False, partition_cols=None, storage_options=None)

1 Answers1

0

After reading Michael Butscher's answer I tried to append my lines to the Dataframe.

Came up with a similar approach to this answer. Create a pd dataframe by appending one row at a time

My code:

def append_row(df, row):
    '''
    A method which requires a dataframe & a new row to be appended to the dataframe.
    '''
    return pd.concat([
                        df, pd.DataFrame([row], 
                        columns=row.index)
                        ]).reset_index(drop=True)
def parse():
    for r in records:
        content = '-----BEGIN CERTIFICATE-----' + '\n' + \
            r[1] + '\n'+'-----END CERTIFICATE-----'

        try:
            cert = x509.load_pem_x509_certificate(str.encode(content))
            cert_policy_value = cert.extensions.get_extension_for_oid(
            ExtensionOID.CERTIFICATE_POLICIES).value

            for ext in cert_policy_value:
                policy_check = ext.policy_identifier.dotted_string
                # logging.info(ext.policy_identifier.dotted_string)

            #Check whether the cert policy oid is Qualified or Non-QF        
            if policy_check in qualified_qv_cert_oid:
                flag = 'Non-QF'
                logging.info('NON-QLFY')
            else:
                flag = 'QLFY'
            
        except BaseException as e:
            logging.error(f'Error found for cert: {e}')
            pass

        #Prepaing the Dataframe to write to a parquet file
        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        new_row = pd.Series({'id': r[0], 'flag':flag, 'timestamp':timestamp})
        df = append_row(df, new_row)
        df.to_csv('qv_output.csv', index=False, encoding='utf-8')
        df.to_parquet(path='qv_parsing.parquet', engine='auto', compression='snappy', index=False, partition_cols=None, storage_options=None)
        logging.info(df)