I have an Excel file and that contains an invoice number and I have a pdf file. If the invoice number in the pdf matches the number in the Excel file, I want to write the number in the B column. In the Excel file the invoice number is written in the A column.
This is my code:
import openpyxl
import pdfplumber
# Load Excel file
excel_file_path = 'path/to/your/excel/file.xlsx'
workbook = openpyxl.load_workbook(excel_file_path)
sheet = workbook.active
# Read invoice numbers from Excel
invoice_numbers_excel = \[row\[0\].value for row in sheet.iter_rows(min_row=2, values_only=True)\]
# Load PDF file
pdf_file_path = 'path/to/your/pdf/file.pdf'
with pdfplumber.open(pdf_file_path) as pdf:
page_text = pdf.pages\[0\].extract_text()
invoice_numbers_pdf = \[num.strip() for num in page_text.split('\\n') if num.strip()\]
# Compare and write matching numbers
for row_idx, invoice_num_excel in enumerate(invoice_numbers_excel, start=2):
if invoice_num_excel in invoice_numbers_pdf:
sheet.cell(row=row_idx, column=2, value=invoice_num_excel)
# Save the modified Excel file
workbook.save('datafile.xlsx')