It's not pretty and it could probably be more efficient but this works
import os, re
# change this to be wherever you keep all those log files
work_dir = '/home/ubuntu/workspace/bin/tmp'
# load the full path for all files in the work_dir (I'm not checking if file is a .log file)
logs = [os.path.join(work_dir, file) for file in os.listdir(work_dir) if os.path.isfile(os.path.join(work_dir, file))]
def process_list(in_list):
date_patt = r'\d{4}-\d{2}-\d{2}[\s]+\d{2}:\d{2}:\d{2}'
last_good_idx = 0
for idx in range(len(in_list)):
if re.search(date_patt, in_list[idx]):
last_good_idx = idx
else:
in_list[last_good_idx] += f' {in_list[idx].strip()}'
return in_list
def clean_list(in_list):
date_patt = r'\d{4}-\d{2}-\d{2}[\s]+\d{2}:\d{2}:\d{2}'
for elem in in_list[:]:
if not re.search(date_patt, elem):
in_list.remove(elem)
return in_list
# write master log to working directory file called master.log
with open(os.path.join(work_dir, 'master.log'), 'w') as out:
for file in logs:
with open(file, 'r') as f:
file_text = f.read()
text_list = file_text.split('\n')
text_list = process_list(text_list)
text_list = clean_list(text_list)
for line in text_list:
out.write(line + '\n')
If you only wanted to use files ending in .log add it to the list comprehension that assigns the logs
variable.
process_list
handles moving lines which don't match the date_patt
regex to the end of the string found at the last index where the date_patt
was matched.
clean_list
removes any element from the input list that doesn't match the date_patt
.