files_data = []
for file in files:
with pdfplumber.open(file) as pdf:
lines = []
for page in pdf.pages:
text = page.extract_text()
for line in text.split('\n'):
lines.append(line)
files_data.append(lines)
z = '\n'.join(files_data[0])
z1 = '\n'.join(files_data[1])
list1 = []
list1.extend((z,z1))
print(list1)
BTW_re = re.compile(r'([A-Za-z]{2}\d{9}[A-Za-z]\d{2})')
KVK_re = re.compile(r'(KVK-nummer: \d+)')
IBAN_re = re.compile(r'([A-Z]{2}\d{2}.[A-Z]{4}(.*))')
BIC_re = re.compile(r'([A-Z]{6}\d[A-Z])')
Factuur_re = re.compile(r'((?<=Factuur:)\s([\S]+))')
Factuurdatum_re = re.compile(r'\d{2}/\d{2}/\d{4}')
Vervaldatum_re = re.compile(r'((?<=Vervaldatum:)\s.+)')
Betreft_re = re.compile(r'(Betreft: (.*))')
partyname_re = re.compile(r'((^(.*)$))')
partyname_re = re.compile(r'^.*$', re.M)
Adressline_re = re.compile(r'^(?:(.*)\r?\n){2}')
postalcode_re = re.compile(r'(\d{4}[A-Z]{2})')
City_re = re.compile(r'((?<=\d{4}[A-Z]{2})[\s\S][A-Z]{1}[A-z]*)')
for item in list1:
print(partyname_re.search(item).group(0))
For instance, for the print(statement), there would be two values stored in seperate lists.
"Rompslomp.nl B.V." & "Rompslomp.nl B.V." (see picture attached)
I don't know how to hack this.. Can someone help me?