I am working with QWebEngineView and have found that when trying to click on a pdf file link it won't open the file. I have found that QWebEngineView has not way to display pdf files on it's own. With some research I can now download pdf files and display them on their own, however I need to be able to get the link of the pdf file from QWebEngineView to know which one to download. The problem is that the .url()
function only returns the url of the current webpage and doesn't seem to be affected by me clicking the link of the pdf file and I can't find any other way to get the link of the pdf file. Any ideas on how to get the link to the pdf file? Any help is appreciated.
Asked
Active
Viewed 475 times
0

eyllanesc
- 235,170
- 19
- 170
- 241

blunty6363
- 29
- 6
1 Answers
1
You can use javascript to get all the links and then filter by the extension:
import sys
from PyQt5.QtCore import QCoreApplication, QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEngineView
def main():
app = QApplication(sys.argv)
url = QUrl("https://www.princexml.com/samples/")
view = QWebEngineView()
def callback(links):
for link in links:
if link.endswith(".pdf"):
print(link)
QCoreApplication.quit()
def handle_load_finished(ok):
if ok:
view.page().runJavaScript(
"""
(function() {
// https://stackoverflow.com/a/3824292/6622587
var urls = [];
for(var i = document.links.length; i --> 0;)
if(document.links[i].hostname === location.hostname)
urls.push(document.links[i].href);
return urls;
})();""",
callback,
)
view.loadFinished.connect(handle_load_finished)
view.load(url)
view.resize(640, 480)
view.show()
sys.exit(app.exec_())
if __name__ == "__main__":
main()
Output:
http://www.princexml.com/howcome/2016/samples/magic6/magic.pdf
http://www.princexml.com/howcome/2016/samples/magic6/magic.pdf
https://www.princexml.com/samples/flyer/flyer.pdf
https://www.princexml.com/samples/flyer/flyer.pdf
https://www.princexml.com/samples/catalog/PrinceCatalogue.pdf
https://www.princexml.com/samples/catalog/PrinceCatalogue.pdf
http://www.princexml.com/howcome/2016/samples//malthus/essay.pdf
http://www.princexml.com/howcome/2016/samples//malthus/essay.pdf
http://www.princexml.com/howcome/2016/samples/magic8/index.pdf
http://www.princexml.com/howcome/2016/samples/magic8/index.pdf
http://www.princexml.com/howcome/2016/samples/invoice/index.pdf
https://www.princexml.com/samples/invoice/invoicesample.pdf
http://www.princexml.com/howcome/2016/samples/invoice/index.pdf
https://www.princexml.com/samples/invoice/invoicesample.pdf
Update:
If you want to download the PDF then it is not necessary to implement the above since QWebEngineView does allow downloads.
import sys
from PyQt5.QtCore import QCoreApplication, QFileInfo, QUrl
from PyQt5.QtWidgets import QApplication, QFileDialog
from PyQt5.QtWebEngineWidgets import QWebEngineView
def handle_download_erequested(download):
download.downloadProgress.connect(print)
download.stateChanged.connect(print)
download.finished.connect(lambda: print("download finished"))
old_path = download.url().path() # download.path()
suffix = QFileInfo(old_path).suffix()
path, _ = QFileDialog.getSaveFileName(None, "Save File", old_path, "*." + suffix)
if path:
download.setPath(path)
download.accept()
def main():
app = QApplication(sys.argv)
url = QUrl("https://www.princexml.com/samples/")
view = QWebEngineView()
view.page().profile().downloadRequested.connect(handle_download_erequested)
view.load(url)
view.resize(640, 480)
view.show()
sys.exit(app.exec_())
if __name__ == "__main__":
main()
Also QWebEngineView has a PDF viewer
import sys
from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets
def main():
print(
f"PyQt5 version: {QtCore.PYQT_VERSION_STR}, Qt version: {QtCore.QT_VERSION_STR}"
)
app = QtWidgets.QApplication(sys.argv)
view = QtWebEngineWidgets.QWebEngineView()
settings = view.settings()
settings.setAttribute(QtWebEngineWidgets.QWebEngineSettings.PluginsEnabled, True)
url = QtCore.QUrl("https://www.princexml.com/samples/invoice/invoicesample.pdf")
view.load(url)
view.resize(640, 480)
view.show()
sys.exit(app.exec_())
if __name__ == "__main__":
main()

eyllanesc
- 235,170
- 19
- 170
- 241
-
Hi, thank you very much for your answer, is there a way I can make this specific to a link I click, I want to be able to click a link which then downloads that pdf, I am unsure how to get the pdf link I have clicked. – blunty6363 Aug 11 '21 at 20:13
-
2@blunty6363 You seem to have an XY problem, check my update. – eyllanesc Aug 11 '21 at 20:20
-
Thank you so much for your help. I have a very strange problem now however. I have added some code to my questions which is responsible for creating my QWebEngineView. My issue is, if I leave the code like it is (with the one line commented out) then I am able to open the pdf files with the viewer, however when trying to open youtube the program crashes, but if I take the comment out and put the line into the program, youtube no longer crashes but I can't open the pdf file any more. Any idea why this happens and any solutions? – blunty6363 Aug 11 '21 at 20:55
-
1@blunty6363 You should not change your post, you do not ask about a pdfviewer (it was just a demo from me) but how to get the links. If you have another problem then create a new post. If you use the code from my pdfviewer do you have the same problem? Please read [ask] and review the [tour] – eyllanesc Aug 11 '21 at 20:59