0

I have been trying to extract emails from a pcap file and add them to a list. I have tried every way I can think off but can't seem to output it in any other way than what looks like a loop.


def email_list(info):
    #print('[+] email addresses found: ')
    list = []
    emaillist = re.findall(r"[a-zA-Z0-9.]+@[a-zA-Z0-9.]+\.\w{2,4}", info)
    for em in emaillist:
        list.append(em)
        print(list)

Sample output


['simonbrew@hotmail.com']
['samson@infoworld.com']
['brianjungman@gmail.com']
['sneakyg33ky@aol.com']
['inter0pt1c@aol.com']
['sneakyg33ky@aol.com']
['sneakyg33ky@aol.com', 'inter0pt1c@aol.com']
['sneakyg33ky@aol.com']
['sneakyg33ky@aol.com']
['sneakyg33ky@aol.com', 'inter0pt1c@aol.com']
['sneakyg33ky@aol.com']
['sneakyg33ky@aol.com']
['d4rktangent@gmail.com']
['sneakyg33ky@aol.com']
['sneakyg33ky@aol.com', 'd4rktangent@gmail.com']
['sneakyg33ky@aol.com']
['sneakyg33ky@aol.com']
['sneakyg33ky@aol.com', 'd4rktangent@gmail.com']
['sneakyg33ky@aol.com']
['mistersekritx@aol.com']
['sneakyg33ky@aol.com']
['sneakyg33ky@aol.com', 'mistersekritx@aol.com']
['sneakyg33ky@aol.com']
['sneakyg33ky@aol.com']
['sneakyg33ky@aol.com', 'mistersekritx@aol.com']

The idea is, I want to find the emails, add them to a list, remove duplicates and then print them out in a nice table format.

This is all my code so far.


import dpkt,socket,datetime,geoip2.database,re,sys,urllib.request,urllib
from tabulate import tabulate
from collections import Counter
from prettytable import PrettyTable


def packet_type(pcap):
    ####https://stackoverflow.com/questions/18256342/parsing-a-pcap-file-in-python####
    other = []
    IP = []
    tcp = []
    udp = []
    igmp = []

    for ts, buf in pcap:
        # Unpack the Ethernet frame (mac src/dst, ethertype)
        eth = dpkt.ethernet.Ethernet(buf)
        #print(f'#<INFO> eth ethernet packet: {repr(eth)}')
        # ip address
        ip = eth.data
        # Extract TCP Payload
        TCP = ip.data
        info = repr(TCP)
        # read the source IP in dst
        src = socket.inet_ntoa(ip.src)
        # read the destination IP in dst
        dst = socket.inet_ntoa(ip.dst)
        try:
            if eth.type != dpkt.ethernet.ETH_TYPE_IP:
                other.append(src)
            IP.append(ip.len)
            if ip.p == dpkt.ip.IP_PROTO_IGMP:
                igmp.append(ip.len)
            elif ip.p == dpkt.ip.IP_PROTO_TCP:
                tcp.append(ip.len)
            elif ip.p == dpkt.ip.IP_PROTO_UDP:
                udp.append(ip.len)
        except Exception as err:
            print(f'Oh no there has been an {err}')
            continue
    timestamp(tcp,udp,igmp)



def timestamp(tcp,udp,igmp):
    tcp.sort()
    Tcp = len(tcp)
    TCP1st = tcp[0]
    TCP2nd = tcp[-1]
    TCPts = str(datetime.datetime.utcfromtimestamp(TCP1st))
    TCP2ts = str(datetime.datetime.utcfromtimestamp(TCP2nd))
    udp.sort()
    Udp = len(udp)
    UDP = udp[0]
    UDP2nd = udp[-1]
    UDPts = str(datetime.datetime.utcfromtimestamp(UDP))
    UDP2ts = str(datetime.datetime.utcfromtimestamp(UDP2nd))
    igmp.sort()
    Igmp = len(igmp)
    IGMP = igmp[0]
    IGMP2nd = igmp[-1]
    IGMPts = str(datetime.datetime.utcfromtimestamp(IGMP))
    IGMP2ts = str(datetime.datetime.utcfromtimestamp(IGMP2nd))
    mean_packet_length(tcp,udp,igmp,TCPts,TCP2ts,UDPts,UDP2ts,IGMPts,IGMP2ts,Tcp,Udp,Igmp)


def mean_packet_length(tcp,udp,igmp,TCPts,TCP2ts,UDPts,UDP2ts,IGMPts,IGMP2ts,Tcp,Udp,Igmp):
    tcpmean = sum(tcp) / len(tcp)
    tcp_mean = round(tcpmean)
    udpmean = sum(udp) / len(udp)
    udp_mean = round(udpmean)
    igmpmean = sum(igmp) / len(igmp)
    igmp_mean = round(igmpmean)
    tabulate_table(tcp_mean,udp_mean,igmp_mean,TCPts,TCP2ts,UDPts,UDP2ts,IGMPts,IGMP2ts,Tcp,Udp,Igmp)


def tabulate_table(tcp_mean,udp_mean,igmp_mean,TCPts,TCP2ts,UDPts,UDP2ts,IGMPts,IGMP2ts,Tcp,Udp,Igmp):
    table =[['TCP',Tcp,TCPts,TCP2ts,tcp_mean], ['UDP',Udp,UDPts, UDP2ts, udp_mean], ['IGMP',Igmp,IGMPts,IGMP2ts,igmp_mean]]
    headers = ['Protocol','Count', 'First_Timestamp', 'Last_Timestamp', 'Mean_Length']
    print(tabulate(table, headers, tablefmt='fancy_grid'))
    tcp()


def email_list(info):
    #print('[+] email addresses found: ')
    list = []
    emaillist = re.findall(r"[a-zA-Z0-9.]+@[a-zA-Z0-9.]+\.\w{2,4}", info)
    for em in emaillist:
        list.append(em)
        print(list)


def tcp():
    with open(r'C:\Users\snoopgrapes\Desktop\evidence-packet-analysis.pcap', 'rb') as pcapfile:
        pcap = dpkt.pcap.Reader(pcapfile)
        for ts, buf in pcap:

            # Unpack the Ethernet frame (mac src/dst, ethertype)
            eth = dpkt.ethernet.Ethernet(buf)
            #print(f'#<INFO> eth ethernet packet: {repr(eth)}')
            # ip address
            ip = eth.data
            # Extract TCP Payload
            TCP = ip.data
            info = repr(TCP)
            email_list(info)


def find_uri():
    found = False
    gif_uri = []
    with open(r'C:\Users\snoopgrapes\Desktop\evidence-packet-analysis.pcap', 'rb') as pcapfile:
        pcap = dpkt.pcap.Reader(pcapfile)
        for ts, buf in pcap:
            try:
                eth = dpkt.ethernet.Ethernet(buf)
                ip = eth.data
                tcp = ip.data
                http = dpkt.http.Request(tcp.data)
                if http.method == 'GET':
                    uri = http.uri.lower()
                    if '.gif' in uri:
                        gif_uri.append(uri)
                        found = True
            except Exception:
                pass
    print(f'Gif URI {gif_uri}')


def main():

    pcapFile = r'C:\Users\snoopgrapes\Desktop\evidence-packet-analysis.pcap'
    #pcapFile = r'C:\Users\snoopgrapes\Desktop\filtered2.pcap'
    #pcapFile = r'C:\Users\snoopgrapes\Desktop\filtered3.pcap'
    #pcapFile = r'C:\Users\snoopgrapes\Desktop\http.pcap'
    #pcapFile = r'C:\Users\snoopgrapes\Desktop\sampledata.pcap'
    #email = r'C:\Users\snoopgrapes\Desktop\email_sample.txt'
    excludesrc = '146.176.164.91'
    f = open(pcapFile, 'rb')
    pcap = dpkt.pcap.Reader(f)
    reader = geoip2.database.Reader('C:\Program Files\Python39\Geo\Geo.mmdb')
    print(f'[*] analysing {pcapFile} for packets not source {excludesrc}')
    print('------------------------------------------------------------')
    packet_type(pcap)


if __name__ == '__main__':
    main()

Thank you so much for any help

2 Answers2

0
list = []
emaillist = re.findall(r"[a-zA-Z0-9.]+@[a-zA-Z0-9.]+\.\w{2,4}", info)
for em in emaillist:
    list.append(em)
    print(list)

Basically you get a list of all emails in emaillist. Then you iterate over this emaillist and add each of the elements to list. But within each of the iterations you print everything which was collected so far, including values you've already printed in the previous iteration. What you likely tried to do is this (note the different indentation):

list = []
emaillist = re.findall(r"[a-zA-Z0-9.]+@[a-zA-Z0-9.]+\.\w{2,4}", info)
for em in emaillist:
    list.append(em)
print(list)

Or even more simple

list = re.findall(r"[a-zA-Z0-9.]+@[a-zA-Z0-9.]+\.\w{2,4}", info)
print(list)
Steffen Ullrich
  • 114,247
  • 10
  • 131
  • 172
  • thanks. I have tried the ways suggested and this is what happens.... It looks like every time it adds an item it removes the previous item..... `[] [] ['sneakyg33ky@aol.com'] [] [] [] [] [] [] [] [] [] ['sneakyg33ky@aol.com', 'mistersekritx@aol.com'] [] [] [] [] []` – snoopstargrapes Jun 07 '21 at 13:23
  • @snoopstargrapes: I have no idea what exact code produced what you show in your comment. – Steffen Ullrich Jun 07 '21 at 14:20
-1
def data(timestamp, ip):

    UDP, TCP = [], []
    UDP_Total, TCP_Total = 0, 0

    for timestamp, eth in zip(Array_Of_TimeStamp, Array_Of_IP):

        if eth.p == dpkt.ip.IP_PROTO_TCP:
            TCP_Info.append(eth)
            TCP_TimeStamp_Info.append(timestamp)
            TCP_Total += 1
            TCP_len.append(len(TCP_Info))

        if eth.p == dpkt.ip.IP_PROTO_UDP:
            UDP_Info.append(eth)
            UDP_TimeStamp_Info.append(timestamp)
            UDP_Total += 1
            UDP_Len.append(len(UDP_Info))

    if TCP_TimeStamp_Info != 0:
        print(f"The Total amount of TCP Packets: {TCP_Total}")
        print(f"This is the First TCP Packet That was found: {datetime.datetime.fromtimestamp(TCP_TimeStamp_Info[0])}")
        print(f"This is the Last TCP Packet That was found: {datetime.datetime.fromtimestamp(TCP_TimeStamp_Info[-1])}")
        print(f"This Is The Legth of the packet: {mean(TCP_len)} \n")
        
Matthias Urlichs
  • 2,301
  • 19
  • 29
morph
  • 1
  • 1
  • Your answer could be improved with additional supporting information. Please [edit] to add further details, such as citations or documentation, so that others can confirm that your answer is correct. You can find more information on how to write good answers [in the help center](/help/how-to-answer). – Community Dec 07 '22 at 22:33
  • It could also be improved by actually testing it. Don't compare arrays with numbers. Initialize your actual arrays, not random other variables. – Matthias Urlichs Dec 09 '22 at 18:05