2

I'm trying to convert the solution given here from PyQt4 to PyQt5 as an exercise.

Somehow the collected html code is being lost on the way. I put some print() on the methods to understand what is happening. The print() of Callable method shows the HTML codes. However, when in the handleLoadFinished method it's None and, consequently, the functions funA and funcB have not to work on.

The code I'm working is:

import sys, signal
from bs4        import BeautifulSoup
from bs4.dammit import UnicodeDammit
from PyQt5           import QtCore, QtGui
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEnginePage as QWebPage

class WebPage(QWebPage):
  def __init__(self):
    QWebPage.__init__(self)
    self.loadFinished.connect(self.handleLoadFinished)

  def process(self, items):
    self._items = iter(items)
    self.fetchNext()

  def fetchNext(self):
    try:
        self._url, self._func = next(self._items)
        self.load(QtCore.QUrl(self._url))
    except StopIteration:
        return False
    return True

  def handleLoadFinished(self):
    A = self.toHtml(self.Callable)
    print('\n\n\n\n\n')
    print("####################### handleLoadFinished: ", A)
    self._func(self._url, self.toHtml(self.Callable))
    if not self.fetchNext():
        print('# processing complete')
        #self._exit()

  def Callable(self, html_str):
    self.html = html_str
    print('####################  Callable html:', self.html)

  def _exit(self):
    print("exiting...")
    QApplication.instance().quit()

def funcA(url, html):
  print('# processing:', url)
  print('html:', html)
  soup = BeautifulSoup(html, "html.parser")
  # do stuff with soup...

def funcB(url, html):
  print('# processing:', url)
  print('html:', html)
  soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)
  # do stuff with soup...

items = [
          ('http://stackoverflow.com', funcA),
          ('http://google.com', funcB),
        ]

signal.signal(signal.SIGINT, signal.SIG_DFL)
print('Press Ctrl+C to quit\n')
app     = QApplication(sys.argv)
webpage = WebPage()
webpage.process(items)
sys.exit(app.exec_())

Any suggestions to help me understand and correct it will be appreciated!

Dknow75
  • 23
  • 4

1 Answers1

0

Obtaining the html in QtWebEngine is asynchronous, so you get None, instead you must pass "self._func" through functools.partial() to add the url:

from functools import partial
import signal
import sys

from bs4 import BeautifulSoup
from bs4.dammit import UnicodeDammit

from PyQt5.QtCore import QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEnginePage as QWebPage


class WebPage(QWebPage):
    def __init__(self):
        QWebPage.__init__(self)
        self.loadFinished.connect(self.handleLoadFinished)

    def process(self, items):
        self._items = iter(items)
        self.fetchNext()

    def fetchNext(self):
        try:
            self._url, self._func = next(self._items)
            self.load(QUrl(self._url))
        except StopIteration:
            return False
        return True

    def handleLoadFinished(self):
        wrapper = partial(self._func, self._url)
        self.toHtml(wrapper)
        if not self.fetchNext():
            print("# processing complete")


def funcA(url, html):
    print("# processing:", url)
    print("html:", html)
    soup = BeautifulSoup(html, "html.parser")


def funcB(url, html):
    print("# processing:", url)
    print("html:", html)
    soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)


items = [
    ("http://stackoverflow.com", funcA),
    ("http://google.com", funcB),
]


def main():

    signal.signal(signal.SIGINT, signal.SIG_DFL)
    print("Press Ctrl+C to quit\n")
    app = QApplication(sys.argv)
    webpage = WebPage()
    webpage.process(items)
    sys.exit(app.exec_())


if __name__ == "__main__":
    main()
eyllanesc
  • 235,170
  • 19
  • 170
  • 241