I'm trying to get some datas from a javascript webpage. My code is generating multiple links and parsing them one by one. Parsing outputs are lists. I have written this code with help from here. But it produces the lists inside a class. I want to insert list items into an sqlite table, and because of this I want to make the local list items global. I've tried to create a global list, put it into the class, and then append to it and return it. I've tried to directly insert them into the database from the processCurrentPage
method and tried to create a list under the class and reach it by Webpage.list. But none of these methods worked. One of my attempts is here, but not the best one - it's only an example. I've tried many alternatives like this. Can you suggest a good way to handle it please?
P.S: I am new at Python, but researching it for whole two days, and read all class documentation, but couldn't find a way.
import sys
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets
import requests
from bs4 import BeautifulSoup
import bs4 as bs
class WebPage(QtWebEngineWidgets.QWebEnginePage):
alldatas=[]
def __init__(self):
super(WebPage, self).__init__()
self.loadFinished.connect(self.handleLoadFinished)
def start(self, urls):
self._urls = iter(urls)
self.fetchNext
@property
def fetchNext(self):
try:
url = next(self._urls)
except StopIteration:
return False
else:
self.load(QtCore.QUrl(url))
return True
def processCurrentPage(self, html):
url = self.url().toString()
# do stuff with html...
soup = bs.BeautifulSoup(html, 'html.parser')
data = soup.find('div', class_='tablo_dual_board')
data1 = data.text
data2 = data1.splitlines()
self.alldatas+=data2
if not self.fetchNext:
QtWidgets.qApp.quit()
def handleLoadFinished(self):
self.toHtml(self.processCurrentPage)
def javaScriptConsoleMessage(self, QWebEnginePage_JavaScriptConsoleMessageLevel, p_str, p_int, p_str_1):
# disable javascript error output
pass
if __name__ == '__main__':
# generate some test urls
onexurl = "https://1xbahis1.com/en/live/Football/"
r = requests.get(onexurl)
soup = BeautifulSoup(r.content, "html.parser")
income = soup.find_all("ul", {"id":"games_content"})
links = soup.find_all("a", {"class": "c-events__name"})
urls = []
for matchlink in links:
urls.append("https://1xbahis1.com/en/"+(matchlink.get("href")))
app = QtWidgets.QApplication(sys.argv)
webpage = WebPage()
webpage.start(urls)
print(webpage.alldatas)
sys.exit(app.exec_())