I'm trying to write an output as a csv file and getting this error:
write_file
df_data.to_csv('output.txt', sep='|')
AttributeError: 'list' object has no attribute 'to_csv'
When I change my output type to the others (excel or html), I get the same error as well.
Here's my code being used:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
def main():
certs_url = "https://csrc.nist.gov/projects/cryptographic-module-validation-program/validated-modules/search/all"
# Data refresh from the NIST site
data = get_html(certs_url)
cmvp_data = get_cmvp_data(data)
write_file('txt', cmvp_data)
def get_html(url):
# make request to URL and convert to BS obj
req = requests.get(url)
soup = BeautifulSoup(req.content, 'html.parser')
return soup
def get_cmvp_data(cmvp_content):
detail_urls_lst = []
sunset_date_lst = []
# --- Build CMVP dataframe ---
search_tbl = cmvp_content.find_all('table', id='searchResultsTable')
# convert HTML table to df obj
cert_table = pd.read_html(str(search_tbl))
df = cert_table[0]
#print(cert_table)
# column headers - sub spaces w/'_'
df.columns = [column.replace(" ", "_") for column in df.columns]
#print(df.columns)
#print(df['Module_Name'][0])
# --- Filter tech ---
df_filtered = df.loc[(df['Vendor_Name'].str.contains('Splunk', case=False, na=False)) |
df['Vendor_Name'].str.contains('Trend Micro', case=False, na=False) |
df['Vendor_Name'].str.contains('Yubico', case=False, na=False) |
df['Vendor_Name'].str.contains('Red Hat', case=False, na=False) |
df['Vendor_Name'].str.contains('Palo Alto', case=False, na=False) |
df['Vendor_Name'].str.contains('Microsoft', case=False, na=False) |
df['Vendor_Name'].str.contains('Cisco', case=False, na=False)]
#print(df_filtered['Module_Name'])
for i in df['Certificate_Number']:
print(i)
return detail_urls_lst
def write_file(output_type, df_data,):
# -- Write file --
if output_type == 'txt':
# df to txt
df_data.to_csv('output.txt', sep='|')
elif output_type == 'excel':
# write to excel
df_data.to_excel('output.xlsx')
else:
# write df to html file
result = df_data.to_html()
func = open('df.html', 'w')
func.write(result)
func.close()
print('Export complete')
return
if __name__ == "__main__":
main()
I tried looking at posts for similar errors and wasn't able to figure out a solution to fix this error. This is my first time using pandas and just can't figure out what's causing this.