1

I am beginner to python and I have a log file where the capacity is 1TB. I would like to know how if I can execute the log file per 10 seconds per line. Also I want the scanning process to be speed and fast. This is my code:

import re
import sys
from csv import writer
import datetime

log_file = '/Users/kiya/Desktop/mysql/ipscan/ip3.txt'

output_file = '/Users/kiya/Desktop/output.csv'

name_to_check = 'MBX_AUTHENTICATION_FAILED'

with open(log_file,encoding="utf-8") as infile:
    for line in infile:
        if name_to_check in line:
            username = re.search(r'(?<=userName=\[)(.*)(?=\],)', line)
            username = username.group()
            #195347627 *+0900
            date = re.search(r'(?P<date>\d{8})\s+(?P<time>\d{9})\s\*\+(?P<zone>\d{4})', line)
            date = datetime.datetime.strptime(date.group('date'), "%Y%m%d").strftime("%Y-%m-%d")
            #print(date)

            time = re.search(r'(?P<date>\d{8})\s+(?P<time>\d{9})\s\*\+(?P<zone>\d{4})', line)
            time = datetime.datetime.strptime(time.group('time'), "%H%M%S%f").strftime("%H:%M:%S")
            #print(time)


            ip = re.search(r'(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])',line)
            ip = ip.group()

            with open(output_file, "ab", buffering=0) as outfile:
               outfile.write(("{},{},{},{}\n".format(username, date, time, ip)).encode())
warezers
  • 174
  • 10
  • You may find the answers to https://stackoverflow.com/questions/1703640/how-to-implement-a-pythonic-equivalent-of-tail-f and https://stackoverflow.com/questions/12523044/how-can-i-tail-a-log-file-in-python helpful. –  Jun 08 '18 at 09:15

0 Answers0