I'm using the below boilerplate PyQt4 code with the goal of capturing all the HTML generated by javascript on a page:
import sys
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
def getHtml(str_url):
r_html = Render(str_url)
html = r_html.frame.toHtml()
return html
I then created a test page to see if it works:
<html>
<head>
<script src="//ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script>
<script type="text/javascript">
$(document).ready(function() {
$('#test').text('This is a test!')
});
</script>
</head>
<body>
<div id="test"></div>
</body>
</html>
so running
getHtml('http://www.mytestpage.com')
I'd expect to see the HTML with the 'This is a test!' text rendered in the div. However the HTML is being returned with that piece absent.
What am I doing wrong? Is the code not waiting for page to fully load? Or am I misunderstanding the use case?