I am using this code: BeautifulSoup on multiple .html files This code is saving extratced text into .txt files. I want to save each record extracted in DataFrame as a separate row.
I want to save the results into DataFrame as a single column as "file". How to achieve the same?
import glob
import os.path
from bs4 import BeautifulSoup
dir_path = r"C:\My_folder\tmp"
results_dir = r"C:\My_folder\tmp\working"
for file_name in glob.glob(os.path.join(dir_path, "*.html")):
with open(file_name) as html_file:
soup = BeautifulSoup(html_file)
results_file = os.path.splitext(file_name)[0] + '.txt'
with open(results_file, 'w') as outfile:
for i in soup.select('font[color="#FF0000"]'):
print(i.text)
outfile.write(i.text + '\n')