I wrote a piece of code to scrape a web which actually works using it with one url but as soon as I put more than 2 ursl in the .txt tells me 'Segmentation Fault'. I have no idea where the problem is. Any help will be appreciated.
import sys
import time
import gc
from bs4 import BeautifulSoup
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
#self.deleteLater()
self.app.quit()
with open('/blah/blah/blah/blah/blah.txt') as f:
urls = f.read().splitlines()
for i in urls:
r = Render(i)
soup = BeautifulSoup(unicode(r.frame.toHtml()))
summary = soup.find('div',{'style' : 'padding-top:10px;'})
tables = summary.find('tbody')
count = 0
print
for row in tables.findAll('tr'):
for cell in row.findAll('td'):
data = cell.getText()
if (count < 15):
data = data + ';'
print data,
count += 1
if (count==16):
print data
count = 0
Well, thats the code. I get 2 iterations of the with loop before it tells me Segmentation fault... :( In other words, I get to scrape 2 url´s out of 6 that the txt has.
Thanks in advance for the help