I trying to extract some table html, but it returns some error and i have no idea why.
I really need some help here
Code:
from bs4 import BeautifulSoup
from io import BytesIO
import requests
import datetime
import re
import rows
# date = datetime.datetime.strptime("2013-1-25", '%Y-%m-%d').strftime('%m/%d/%y')
url = 'http://www1.caixa.gov.br/loterias/_arquivos/loterias/D_MEGA.HTM'
response = requests.get(url)
html = response.content
soup = BeautifulSoup(html, 'lxml')
tabela = soup.find("table")
for tag in tabela.find_all('table'):
_ = tag.replaceWith('')
soup_tr = tabela.findAll("tr")
lista_tr = list(soup_tr)
lista_tr[0] = lista_tr[1]
s = "".join([str(l) for l in lista_tr])
s = "<table>" + s + "</table>"
s = re.sub("(<!--.*?-->)", "", s, flags=re.DOTALL)
table = rows.import_from_html(BytesIO(bytes(s, encoding='utf-8')))
Output error below:
File "C:\Users\atendimentopcp300_01\Desktop\Antony\Blue Challenge\megasena.py", line 6, in <module>
import rows
File "C:\Users\atendimentopcp300_01\Desktop\Antony\Blue Challenge\venv\lib\site-packages\rows\__init__.py", line 22, in <module>
import rows.plugins as plugins
File "C:\Users\atendimentopcp300_01\Desktop\Antony\Blue Challenge\venv\lib\site-packages\rows\plugins\__init__.py", line 24, in <module>
from . import plugin_html as html
File "C:\Users\atendimentopcp300_01\Desktop\Antony\Blue Challenge\venv\lib\site-packages\rows\plugins\plugin_html.py", line 43, in <module>
unescape = HTMLParser().unescape
AttributeError: 'HTMLParser' object has no attribute 'unescape'