0

I am trying to extract a series of Urls from a few links. I managed to get the code below to extract the link I needed from one of the pages but when i have it iterate through my list of links it produces the error I mentioned in the title of my post. I originally saw a post here PyQt: RuntimeError: wrapped C/C++ object has been deleted and in that the person linked to the https://doc.qt.io/archives/qt-4.8/qstackedwidget.html Qtstacked widget. I checked the documentation but had a slight issue understanding how it would apply in my case and as such I just need assistance in modifying code to fit my program.

from bs4 import BeautifulSoup  
import sys 
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl, pyqtSignal , QEventLoop
from PyQt5.QtWebEngineWidgets import QWebEnginePage

class Client(QWebEnginePage):
    toHtmlFinished = pyqtSignal()

    def __init__(self, url):
        self.app = QApplication(sys.argv)
        QWebEnginePage.__init__(self)
        self.loadFinished.connect(self.on_page_load)
        self.load(QUrl(url))
        self.app.exec_()

    def on_page_load(self):
        self.app.quit()

    def store_html(self, html):
        self.html = html
        self.toHtmlFinished.emit()

    def get_html(self):
        self.toHtml(self.store_html)
        loop = QEventLoop()
        self.toHtmlFinished.connect(loop.quit)
        loop.exec_()
        return self.html
link_list = ['http://quotes.toscrape.com/author/Albert-Einstein/','http://quotes.toscrape.com/author/J-K-Rowling/','http://quotes.toscrape.com/author/Jane-Austen/']

for links in link_list:
    client_response = Client(links)
    source = client_response.get_html()
    soup = BeautifulSoup(source,'lxml')
    med_url = soup.find('div',class_ = 'author-title')
    print(med_url)

when I run it i get this:

wrapped C/C++ object of type Client has been deleted
  File "C:\Python\New folder\Linkextractor.py", line 27, in get_html
    self.toHtml(self.store_html)
  File "C:\Python\New folder\Linkextractor.py", line 41, in <module>
    source = client_response.get_html()

0 Answers0