Cant receive certificates in a python webcrawler

Question

I'm developing a WebCrawler, but Im having a lot of issues reading the certificate from the server which I am connecting to, I just need to read the certificate and print it on the screen, but using the getpeercert() function the certificate is always {}, and I tried to use get_server_certificate(), which returns me an error saying that there are too many values to unpack. I know that there are some errors in the threads also but I am going to fix that later. Thanks in advance already.

The issue is on the 'acesso' function

#coding: utf8
import socket
import sys
import re
import ssl
import pprint
from threading import Thread
from urlparse import urlparse


threads = []
vetorLinks = []
linksVisitados = []
nThreads = 4 #numero de threads

def acesso(url):
    print "Acessando: " + url
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    ssl_socket = ssl.wrap_socket(s)

    urlParsed = urlparse(url)

    try:
        ssl_socket.connect((urlParsed[1], 443))
    except  socket.timeout:
        print "Time out"

    ssl.get_server_certificate(urlParsed[1])

    pprint.pprint(ssl_socket.getpeercert())

    request = "GET " + urlParsed[2] + " HTTP/1.1\r\nUser-Agent: Python\r\nHost: " +     urlParsed[1] + "\r\nConnection: persistent\r\n\r\n"

    ssl_socket.sendall(request)
    dados = ''
    try:
        while(True):
            buff = ssl_socket.recv(4096)
            if not len(buff):
                break
            dados += buff
    except socket.timeout:
        print "Time Out"

    ssl_socket.close()
    return dados


def navega(url, profundidade, vetorLinks, visitados):
    if not url in visitados and (url.startswith("http://") or     url.startswith("https://")):
        visitados.append(url) #salva a url no vetor dos visitados

        html = acesso(url) #html da pagina lida

        urls = re.findall(r"""<a href=[\'"]?([^\'" >]+)""", html)

        vetorLinks.extend(urls)     
        if profundidade==0:
            return
        else:
            try:
                for i in range(0, len(vetorLinks)):
                    for j in range(0, nThreads):
                        t = Thread(target = navega, args = (vetorLinks[i], profundidade-    1, vetorLinks, linksVisitados))
                        t.start()
                        threads.append(t)
                        i += 1
                    for t in threads:
                        t.join()
            except RuntimeError:
                print "RuntimeError nas Threads."

navega(sys.argv[2], int(sys.argv[1]), vetorLinks, linksVisitados)

possible duplicate of [How can I retrieve the SSL certificate information for a connection](http://stackoverflow.com/questions/7689941/how-can-i-retrieve-the-ssl-certificate-information-for-a-connection) — jww, Jul 07 '14 at 05:49
No big deal. Check out the "related" questions when you are composing your question. Sometimes they offer helpful results. Anyway, at least you asked a programming question. +1. — jww, Jul 07 '14 at 22:33

Cant receive certificates in a python webcrawler

0 Answers0