I'm developing a WebCrawler, but Im having a lot of issues reading the certificate from the server which I am connecting to, I just need to read the certificate and print it on the screen, but using the getpeercert() function the certificate is always {}, and I tried to use get_server_certificate(), which returns me an error saying that there are too many values to unpack. I know that there are some errors in the threads also but I am going to fix that later. Thanks in advance already.
The issue is on the 'acesso' function
#coding: utf8
import socket
import sys
import re
import ssl
import pprint
from threading import Thread
from urlparse import urlparse
threads = []
vetorLinks = []
linksVisitados = []
nThreads = 4 #numero de threads
def acesso(url):
print "Acessando: " + url
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
ssl_socket = ssl.wrap_socket(s)
urlParsed = urlparse(url)
try:
ssl_socket.connect((urlParsed[1], 443))
except socket.timeout:
print "Time out"
ssl.get_server_certificate(urlParsed[1])
pprint.pprint(ssl_socket.getpeercert())
request = "GET " + urlParsed[2] + " HTTP/1.1\r\nUser-Agent: Python\r\nHost: " + urlParsed[1] + "\r\nConnection: persistent\r\n\r\n"
ssl_socket.sendall(request)
dados = ''
try:
while(True):
buff = ssl_socket.recv(4096)
if not len(buff):
break
dados += buff
except socket.timeout:
print "Time Out"
ssl_socket.close()
return dados
def navega(url, profundidade, vetorLinks, visitados):
if not url in visitados and (url.startswith("http://") or url.startswith("https://")):
visitados.append(url) #salva a url no vetor dos visitados
html = acesso(url) #html da pagina lida
urls = re.findall(r"""<a href=[\'"]?([^\'" >]+)""", html)
vetorLinks.extend(urls)
if profundidade==0:
return
else:
try:
for i in range(0, len(vetorLinks)):
for j in range(0, nThreads):
t = Thread(target = navega, args = (vetorLinks[i], profundidade- 1, vetorLinks, linksVisitados))
t.start()
threads.append(t)
i += 1
for t in threads:
t.join()
except RuntimeError:
print "RuntimeError nas Threads."
navega(sys.argv[2], int(sys.argv[1]), vetorLinks, linksVisitados)