I'm trying to convert the solution given here from PyQt4 to PyQt5 as an exercise.
Somehow the collected html code is being lost on the way. I put some print()
on the methods to understand what is happening. The print()
of Callable
method shows the HTML codes. However, when in the handleLoadFinished
method it's None
and, consequently, the functions funA
and funcB
have not to work on.
The code I'm working is:
import sys, signal
from bs4 import BeautifulSoup
from bs4.dammit import UnicodeDammit
from PyQt5 import QtCore, QtGui
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEnginePage as QWebPage
class WebPage(QWebPage):
def __init__(self):
QWebPage.__init__(self)
self.loadFinished.connect(self.handleLoadFinished)
def process(self, items):
self._items = iter(items)
self.fetchNext()
def fetchNext(self):
try:
self._url, self._func = next(self._items)
self.load(QtCore.QUrl(self._url))
except StopIteration:
return False
return True
def handleLoadFinished(self):
A = self.toHtml(self.Callable)
print('\n\n\n\n\n')
print("####################### handleLoadFinished: ", A)
self._func(self._url, self.toHtml(self.Callable))
if not self.fetchNext():
print('# processing complete')
#self._exit()
def Callable(self, html_str):
self.html = html_str
print('#################### Callable html:', self.html)
def _exit(self):
print("exiting...")
QApplication.instance().quit()
def funcA(url, html):
print('# processing:', url)
print('html:', html)
soup = BeautifulSoup(html, "html.parser")
# do stuff with soup...
def funcB(url, html):
print('# processing:', url)
print('html:', html)
soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)
# do stuff with soup...
items = [
('http://stackoverflow.com', funcA),
('http://google.com', funcB),
]
signal.signal(signal.SIGINT, signal.SIG_DFL)
print('Press Ctrl+C to quit\n')
app = QApplication(sys.argv)
webpage = WebPage()
webpage.process(items)
sys.exit(app.exec_())
Any suggestions to help me understand and correct it will be appreciated!