I'm trying to write a web scraper using PyQt5 and multithreading so that I can scrape multiple urls in parallel (i'm aware of this : Scrape multiple urls using QWebPage but I really want to write a parallel version and really can't see why it doesn't work) I've written this code :
python
import sys
from PyQt5.QtGui import *
from PyQt5.QtWidgets import *
from PyQt5.QtCore import *
from PyQt5.QtWebEngineWidgets import QWebEnginePage
import time
urlb = "https://www.google.fr/"
class Worker(QRunnable, QWebEnginePage):
'''
Worker thread
'''
def __init__(self, url):
super(Worker, self).__init__()
self.url = url
def _on_load_finished(self):
print("tfouuu")
self.html = self.toHtml(self.Callable)
print('Load finished')
def Callable(self, html_str):
self.html = html_str
@pyqtSlot()
def run(self):
print("a")
time.sleep(2)
print(self.url)
print("b")
QWebEnginePage.__init__(self)
print("c")
self.html = ''
self.loadFinished.connect(self._on_load_finished)
self.load(QUrl(url))
print("d")
class MainWindow(QMainWindow):
def __init__(self, *args, **kwargs):
self.threadpool = QThreadPool()
print("Multithreading with maximum %d threads" % self.threadpool.maxThreadCount())
super(MainWindow, self).__init__(*args, **kwargs)
worker = Worker(urlb)
worker2 = Worker(urlb)
self.threadpool.start(worker)
self.threadpool.start(worker2)
app = QApplication([])
window = MainWindow()
app.exec_()
But I have 2 problems:
the first one is that my code keeps running without stopping (I guess it has to do with the lack of app.quit() line but I don't really know where to put it)
and mostly the second problem is that my code prints only 'a', 'b', 'c' -> it doesn't run the connect and load part