I am comparing scapy and dpkt in terms of speed. I have a directory with pcap files which I parse and count the http requests in each file. Here's the scapy code :
import time
from scapy.all import *
def parse(f):
x = 0
pcap = rdpcap(f)
for p in pcap:
try:
if p.haslayer(TCP) and p.getlayer(TCP).dport == 80 and p.haslayer(Raw):
x = x + 1
except:
continue
print x
if __name__ == '__main__':\
path = '/home/pcaps'
start = time.time()
for file in os.listdir(path):
current = os.path.join(path, file)
print current
f = open(current)
parse(f)
f.close()
end = time.time()
print (end - start)
The script is really slow (it gets stuck after a few minutes) compared to the dpkt version :
import dpkt
import time
from os import walk
import os
import sys
def parse(f):
x = 0
try:
pcap = dpkt.pcap.Reader(f)
except:
print "Invalid Header"
return
for ts, buf in pcap:
try:
eth = dpkt.ethernet.Ethernet(buf)
except:
continue
if eth.type != 2048:
continue
try:
ip = eth.data
except:
continue
if ip.p == 6:
if type(eth.data) == dpkt.ip.IP:
tcp = ip.data
if tcp.dport == 80:
try:
http = dpkt.http.Request(tcp.data)
x = x+1
except:
continue
print x
if __name__ == '__main__':
path = '/home/pcaps'
start = time.time()
for file in os.listdir(path):
current = os.path.join(path, file)
print current
f = open(current)
parse(f)
f.close()
end = time.time()
print (end - start)
So it there something wrong with the way I am using scapy? Or is it just that scapy is slower than dpkt?