i am a beginner python How do I get the IP address, user name, date, time and save it in a csv file? Because it is a 1TB large file, it takes over 30 hours. Is it slow if I use regular expressions? So, how can we analyze with pandas?
I write some Python code for getting ip address only: [very slow]
import re
from collections import Counter
import csv
import sys
names_to_check = ('ImapConnMade', 'UserDataException', 'AcctBadPswd')
def read_log(filename):
rege = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
with open(filename,encoding='utf-8') as f:
log = f.read()
if any(name in log for name in names_to_check):
ip_list = re.findall(rege,log)
return ip_list
def counter(ip_list):
count = Counter(ip_list)
return count
def write_csv(count):
with open('/Users/kiya/Desktop/mysql/ipscan/kk.csv', 'w') as out:
header = ['ip']
write = csv.DictWriter(out,fieldnames=header)
write.writeheader()
for k,v in count.items():
write.writerow({"ip":k})
if __name__ == '__main__':
log_file = '/Users/kiya/Desktop/mysql/ipscan/ip2.txt'
write_csv(counter(read_log(log_file)))
my sample log file:
20160531 161947365+0900 elim1td001p imapserv 58124 58719 139941482481408 Note;UserDataException(485/968) MBX_AUTHENTICATION_FAILED:{protocolType=[imap], userName=MzQyNQo, password=[MTY2ODgyMDAwOAo]}:AuthenticateAndGetMailboxService\3aPOST:Authenticating Failed.::user=MzQyNQo:cmd=1 LOGIN MzQyNQo MTY2ODgyMDAwOAo:fromhost=4.165.114.31:sid=22e63f2a-0ffa-77bf-352e-001720d50212