I'd like to continue my last related thread in my attempt to understand and build a BitTorrent search engine. While listening the network for "get_peers" messages, I manage to grab infohashes. I proceed to ask the corresponding DHT node for it's peers. In my understanding in order to find out if the infohash is valid, (for starters) I have to send a BitTorrent handshake to the peers and compare the responses. However, besides the connection refused errors which I ignore for now, most peers reply with empty responses. Am I doing something wrong here? Note that the following code samples are not a great implementation, I just want to understand the flow.
Handshake function:
import socket
def handshake(infohash, peer):
peer_id = b"-TR2940-k8hj0wgej6ch"
handshake = b'\x13'
handshake += b'BitTorrent protocol'
handshake += b'\x00\x00\x00\x00\x00\x10\x00\x00'
handshake += infohash
handshake += peer_id
try:
ClientSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
ClientSocket.settimeout(3)
ClientSocket.connect(peer)
print("Connected to peer.")
ClientSocket.sendall(handshake)
response = ClientSocket.recv(68)
if not response:
print("Empty response.")
return
print(f"Handshake completed, resp: {response}")
ClientSocket.close()
except Exception as e:
print(e)
Utilities to get peers from given infohash and DHT node:
import random
import uuid
import bencode
import socket
from struct import unpack
import handshake
def newTID(tidlen):
tid = ""
for i in range(0, tidlen):
tid += chr(random.randint(97, 122))
return tid
def newID():
return uuid.uuid4().hex[0:20]
def split_nodes(nodes):
length = len(nodes)
if (length % 26) != 0:
return
for i in range(0, length, 26):
nid = nodes[i:i+20]
ip = socket.inet_ntoa(nodes[i+20:i+24])
port = unpack("!H", nodes[i+24:i+26])[0]
yield nid, ip, port
UDPClientSocket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP)
UDPClientSocket.settimeout(4)
def get_peers_from_infohash(infohash, node):
get_peers_query = {"t":"aa", "y":"q", "q":"get_peers", "a": {"id":newID(), "info_hash":infohash}}
get_peers_query = bencode.encode(get_peers_query)
UDPClientSocket.sendto(get_peers_query, node)
received = UDPClientSocket.recvfrom(65536)
msg = received[0]
decoded = bencode.decode(msg)
peers = split_nodes(decoded["r"]["nodes"])
for nid, ip, port in peers:
print(infohash, infohash.hex(), ip, port)
handshake.handshake(infohash, (ip, port))
My DHT crawler:
import bencode
import socket
import uuid
from struct import unpack
import threading
import random
import dhtutils
def newTID(tidlen):
tid = ""
for i in range(0, tidlen):
tid += chr(random.randint(97, 122))
return tid
def newID():
return uuid.uuid4().hex[0:20]
def handle_message(msg, node):
if msg.get("e"):
# print(msg.get("e"))
pass
elif msg.get("y") == "r":
handle_response(msg, node)
elif msg.get("y") == "q":
handle_query(msg, node)
def handle_query(msg, node):
try:
if msg["q"] == "get_peers":
infohash = msg["a"]["info_hash"]
# print(infohash.hex(), msg, node)
print(infohash.hex())
dhtutils.get_peers_from_infohash(infohash, node)
except:
pass
def handle_response(msg, node):
global all_nodes
if msg.get("r").get("nodes"):
# response from find_nodes
nodes = msg.get("r").get("nodes")
if nodes:
nodes = split_nodes(nodes)
for id, ip, port in nodes:
find_nodes(id, (ip, port))
all_nodes.append((id, (ip, port)))
elif msg.get("t") == "pg":
# response from ping
id = msg["r"]["id"]
all_nodes.append((id, node))
def split_nodes(nodes):
length = len(nodes)
if (length % 26) != 0:
return
for i in range(0, length, 26):
nid = nodes[i:i+20]
ip = socket.inet_ntoa(nodes[i+20:i+24])
port = unpack("!H", nodes[i+24:i+26])[0]
yield nid, ip, port
def find_nodes(id, node):
global UDPClientSocket
find_node_query = {"t":newTID(2), "y":"q", "q":"find_node", "a": {"id":newID(), "target":id}}
find_node_query = bencode.encode(find_node_query)
UDPClientSocket.sendto(find_node_query, node)
def ping(node):
global UDPClientSocket
ping_query = {"t":"pg", "y":"q", "q":"ping", "a":{"id":newID()}}
ping_query = bencode.encode(ping_query)
UDPClientSocket.sendto(ping_query, node)
def listen():
while True:
try:
received = UDPClientSocket.recvfrom(65536)
msg = received[0]
src = received[1]
decoded = bencode.decode(msg)
handle_message(decoded, src)
except Exception as e:
pass
UDPClientSocket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP)
T = threading.Thread(target=listen)
T.start()
nodes = [
("router.bittorrent.com", 6881),
("dht.transmissionbt.com", 6881),
("router.utorrent.com", 6881)
]
for node in nodes:
ping(node)
all_nodes = []
while True:
if len(all_nodes) > 0:
for node in all_nodes:
find_nodes(node[0], node[1])