I am trying to extract a series of Urls from a few links. I managed to get the code below to extract the link I needed from one of the pages but when i have it iterate through my list of links it produces the error I mentioned in the title of my post. I originally saw a post here PyQt: RuntimeError: wrapped C/C++ object has been deleted and in that the person linked to the https://doc.qt.io/archives/qt-4.8/qstackedwidget.html Qtstacked widget. I checked the documentation but had a slight issue understanding how it would apply in my case and as such I just need assistance in modifying code to fit my program.
from bs4 import BeautifulSoup
import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl, pyqtSignal , QEventLoop
from PyQt5.QtWebEngineWidgets import QWebEnginePage
class Client(QWebEnginePage):
toHtmlFinished = pyqtSignal()
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebEnginePage.__init__(self)
self.loadFinished.connect(self.on_page_load)
self.load(QUrl(url))
self.app.exec_()
def on_page_load(self):
self.app.quit()
def store_html(self, html):
self.html = html
self.toHtmlFinished.emit()
def get_html(self):
self.toHtml(self.store_html)
loop = QEventLoop()
self.toHtmlFinished.connect(loop.quit)
loop.exec_()
return self.html
link_list = ['http://quotes.toscrape.com/author/Albert-Einstein/','http://quotes.toscrape.com/author/J-K-Rowling/','http://quotes.toscrape.com/author/Jane-Austen/']
for links in link_list:
client_response = Client(links)
source = client_response.get_html()
soup = BeautifulSoup(source,'lxml')
med_url = soup.find('div',class_ = 'author-title')
print(med_url)
when I run it i get this:
wrapped C/C++ object of type Client has been deleted
File "C:\Python\New folder\Linkextractor.py", line 27, in get_html
self.toHtml(self.store_html)
File "C:\Python\New folder\Linkextractor.py", line 41, in <module>
source = client_response.get_html()