I am really new at Python and trying to scrape some data from a javascript rendered web page with the second one on this. When i try to apply this code for a for loop it returns only 2 results from the list with 50 items and gives "Process finished with exit code -1073740940 (0xC0000374)"
error. Can anyone explain the reason please?
My sample is here:
class Page(QWebEnginePage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebEnginePage.__init__(self)
self.html = ''
self.loadFinished.connect(self._on_load_finished)
self.load(QUrl(url))
self.app.exec_()
def _on_load_finished(self):
self.html = self.toHtml(self.Callable)
def Callable(self, html_str):
self.html = html_str
self.app.quit()
def main():
global linklist
for iurl in linklist:
page = Page(iurl)
soup = bs.BeautifulSoup(page.html, 'html.parser')
data = soup.find('div', class_='tablo_dual_board')
data = data.text
data = data.splitlines()
print(data)
I've also tried this one and it gives result only for the first list item. Is there anyway other than these to apply a function for list items?
for iurl in linklist:
iurl=main()
My whole code is here:
import sys
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets
import requests
from bs4 import BeautifulSoup
import bs4 as bs
class WebPage(QtWebEngineWidgets.QWebEnginePage):
def __init__(self):
super(WebPage, self).__init__()
self.loadFinished.connect(self.handleLoadFinished)
def start(self, urls):
self._urls = iter(urls)
self.fetchNext
@property
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.load(QtCore.QUrl(url))
return True
def processCurrentPage(self, html):
url = self.url().toString()
# do stuff with html...
soup = bs.BeautifulSoup(html, 'html.parser')
veri = soup.find('div', class_='tablo_dual_board')
veri = veri.text
veri = veri.splitlines()
print(veri)
if not self.fetchNext:
QtWidgets.qApp.quit()
def handleLoadFinished(self):
self.toHtml(self.processCurrentPage)
def javaScriptConsoleMessage(self, *args):
# disable javascript error output
pass
if __name__ == '__main__':
# generate some test urls
onexurl = "https://1xbahis1.com/en/live/Football/"
r = requests.get(onexurl)
soup = BeautifulSoup(r.content, "html.parser")
income = soup.find_all("ul", {"id":"games_content"})
links = soup.find_all("a", {"class": "c-events__name"})
urls = []
for matchlink in links:
urls.append("https://1xbahis1.com/en/"+(matchlink.get("href")))
# only try 3 urls for testing
urls = urls[:3]
app = QtWidgets.QApplication(sys.argv)
webpage = WebPage()
webpage.start(urls)
sys.exit(app.exec_())