0

I'm trying use python to download a file from a site, which can only be downloaded after you login, which seems to be working OK! But when I actually try to download the file, I only receive a text file saying I must log in. I believe I need to get the PHPSESSID cookie and use it, but can't figure out how to do it. Here is my code:

from BeautifulSoup import BeautifulSoup
import re
import requests
import sys

class LegendasTV(object):
    URL_BUSCA = 'http://legendas.tv/legenda/busca/%s/1'

    URL_DOWNLOAD = 'http://legendas.tv/downloadarquivo/%s'

    URL_LOGIN = 'http://legendas.tv/login'

    def __init__(self, usuario, senha):
        self.usuario = usuario
        self.senha = senha
        self.cookie = None

        self._login()

    def _login(self):
        s = requests.Session()
        url = self.URL_LOGIN
        payload = {'data[User][username]': self.usuario, 'data[User][password]': self.senha, "data[lembrar]": "on"}
        r = s.post(url, payload)
        html = r.content

        if "<title>Login - Legendas TV</title>" in html:
            return 0
        else:
            print 'Success login!'
            return 1

    def _request(self, url, method='GET', data=None):
        if method == 'GET':
            r = requests.get(url, stream=True)
        if method == 'POST' and data:
            r = requests.post(url, data=data)

        return r

    def search(self, q, lang='pt-br', tipo='release'):
        if not q:
           pass # raise exception

        if not lang or not self.LEGENDA_LANG.get(lang):
           pass # raise exception

        if not tipo or not self.LEGENDA_TIPO.get(tipo):
           pass # raise exception

        busca = { 'txtLegenda': q,
                  'int_idioma': self.LEGENDA_LANG[lang],
                  'selTipo':    self.LEGENDA_TIPO[tipo] }

        r = self._request(self.URL_BUSCA % q, method='POST', data=busca)
        if r:
            legendas = self._parser(r.text)
        else: 
            pass # raise exception

        return legendas

    def _parser(self, data):
        legendas = []

        html = BeautifulSoup(data)
        results = html.findAll("a")
        for result in results:
            if result.get("href") is not None and "S09E16" in result.get("href"):
                path_href = result.get("href").split("/")
                unique_id_download = path_href[2]
                url = self.URL_DOWNLOAD % unique_id_download

    def download(self, url_da_legenda):
        r = self._request(url_da_legenda)
        if r:
            with open("teste.rar", 'wb') as handle:
                print u'Baixando legenda:', url_da_legenda
                handle.write(r.content)

and here is how I am trying to use the code to download one file:

$ python
Python 2.7.6 (default, Jun 22 2015, 17:58:13) 
[GCC 4.8.2] on linux2
Type "help", "copyright", "credits" or "license" for more information. 
>>> 
>>> from download_legenda import *
>>> legendas_tv = LegendasTV("Login", "Pass")
Success login!
>>> 
>>> legendas_tv.download("http://legendas.tv/downloadarquivo/56c76ce239291")
Baixando legenda: http://legendas.tv/downloadarquivo/56c76ce239291
>>>

I would appreciate any help.

Vini.g.fer
  • 11,639
  • 16
  • 61
  • 90

1 Answers1

0

With the help of this answer I finally figured it out!

https://stackoverflow.com/a/12737874/1718174

I was trying to use cookies directly, but seems session already does the heavy-lifting part and take care of that for us. Below are the parts that needed to be updated on my code:

def _login(self):
    s = requests.Session()
    url = self.URL_LOGIN
    payload = {'data[User][username]': self.usuario, 'data[User][password]': self.senha, "data[lembrar]": "on"}
    r = s.post(url, payload)
    html = r.content

    if "<title>Login - Legendas TV</title>" in html:
        return 0
    else:
        print 'Success on login!'
        self.session = s

        return 1

def _request(self, url, method='GET', data=None):
    if self.session:
        if method == 'GET':
            r = self.session.get(url, cookies=self.cookie, stream=True)
        if method == 'POST' and data:
            r = self.session.post(url, data=data, cookies=self.cookie)

        return r
Community
  • 1
  • 1
Vini.g.fer
  • 11,639
  • 16
  • 61
  • 90