Is there any easy way to parse machine generated log files? Here is 1 line from the file that I am trying to read and save to SQL.
import re
mytext='''2018-07-22 08:12:02 Receive DR [SMAC:PI_ID_VOP3_N_K_T] [SVC:vikan2] [ACT:vitp3] [BINF:] [FID:e6f97a4b-c4bc-4aa1-876b-3a5ce40b4cfd] [META:?spp?dl_err=%03%00%
00&] [from:ALERTS] [to:91985] [flags:-1:-1:-1:-1:1] [msg:132:id:e6f97a4b-c4bc-4aa1-876b-3a5ce40b4cfd sub:001 dlvrd:001 submit date:180722070002 done date:1807220
81157 stat:DEL err:000 text:] [udh:0:]
'''
values = re.search(r'SVC:(.*)]', mytext)
print (values.groups())
Intead of returning only 'vikan2' it returns a long string...
('vikan2] [ACT:vitp3] [BINF:] [FID:e6f97a4b-c4bc-4aa1-876b-3a5ce40b4cfd',)
I am trying to convert the string to dictionary that will look like this...
{'log_date':'2018-07-22 08:12:02', 'log_entry':'Receive DR',
'SMAC': 'PI_ID_VOP3_N_K_T',
'SVC': 'vikan2',
'ACT': 'vitp3',
'BINF': '',
'FID': 'e6f97a4b-c4bc-4aa1-876b-3a5ce40b4cfd',
'META': '?spp?dl_err=%03%00%\n00&',
'from': 'ALERTS',
'to': '91985',
'flags': '-1:-1:-1:-1:1',
'msg': '132:id:e6f97a4b-c4bc-4aa1-876b-3a5ce40b4cfd',
'sub': '001',
'dlvrd': '001',
'submit date': '180722070002',
'done date': '1807220\n81157',
'stat': 'DEL',
'err': '000',
'text': '',
'udh': '0:'}
Update:
1) I am not able to read the "meta" field (i.e. did not return expected string '?spp?dl_err=%03%00%\n00&'
2) I am able to build the dictinoary using this code. But I am sure there must better way to do this.
svc = re.search(r'SVC:(.*?)]', mytext)
smc = re.search(r'SMC:(.*?)]', mytext)
act = re.search(r'ACT:(.*?)]', mytext)
binf = re.search(r'BINF:(.*?)]', mytext)
fid = re.search(r'FID:(.*?)]', mytext)
from1 = re.search(r'from:(.*?)]', mytext)
to1 = re.search(r'to:(.*?)]', mytext)
flags = re.search(r'flags:(.*?)]', mytext)
msg = re.search(r'msg:(.*?)\s', mytext)
sub = re.search(r'sub:(.*?)\s', mytext)
dlvrd = re.search(r'dlvrd:(.*?)\s', mytext)
submit_date = re.search(r'submit date:(.*?)\s', mytext)
done_date = re.search(r'done date:(.*?)\s', mytext)
stat = re.search(r'stat:(.*?)\s', mytext)
err = re.search(r'err:(.*?)\s', mytext)
text = re.search(r'text:(.*?)]', mytext)
udh = re.search(r'udh:(.*?)]', mytext)
mydict=dict()
mydict['SVC'] = svc.groups()[0]
mydict['SMC'] = smc.groups()[0]
mydict['ACT'] = act.groups()[0]
mydict['BINF'] = binf.groups()[0]
mydict['FID'] = fid.groups()[0]
mydict['from'] = from1.groups()[0]
mydict['to'] = to1.groups()[0]
mydict['flags'] = flags.groups()[0]
mydict['msg'] = msg.groups()[0]
mydict['sub'] = sub.groups()[0]
mydict['dlvrd'] = dlvrd.groups()[0]
mydict['submit_date'] = submit_date.groups()[0]
mydict['done_date'] = done_date.groups()[0]
mydict['stat'] = stat.groups()[0]
mydict['err'] = err.groups()[0]
mydict['text'] = text.groups()[0]
mydict['udh'] = udh.groups()[0]