0

Here is the code. I am using the JSoup library to access the HTML elements and get the locations of the images. Then, I pass those image locations into an image downloader that downloads the images to the desktop.

import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
import java.net.URLConnection;

import org.jsoup.Jsoup;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class JSoupExtractor {
    public static int i = 1;
    public static void main (String[] args) {
        Validate.isTrue(true, "usage: supply url to fetch");
        String destinationFile = "C:\\Users\\MYNAME\\Desktop\\Output\\image";
        Document doc = null;
        try {
            doc = Jsoup.connect("http://google.com/").get();
        } catch (IOException e1) {
            // TODO Auto-generated catch block
            System.out.println("OOF!");
            e1.printStackTrace();
        }
        Elements pngs = doc.select("img[src$=.png]");
        Elements jpgs = doc.select("img[src$=.jpg]");
        //int i = 1;
        for (Element e : pngs) {
            System.out.println("hi");
            System.out.println(e.attr("abs:src"));
            try {
                saveImage(e.attr("abs:src"), destinationFile + (i) + ".png");
            } catch (Exception e1) {
                System.out.println("DANG!");
                e1.printStackTrace();
            }
            i++;
        }
    }
    public static void saveImage(String imageUrl, String destinationFile) throws IOException {
        URL url = new URL(imageUrl);
        URLConnection connection = url.openConnection();
        connection.setRequestProperty("User-Agent", "Chrome/56.0.2924.87");
        InputStream is = connection.getInputStream();
        FileOutputStream os = new FileOutputStream(destinationFile);
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        byte[] b = new byte [4096];
       
        int length;
        
        while ((length = is.read(b)) >= 0) {
            System.out.println(i);
            out.write(b, 0, length);
        }
        byte[] response = out.toByteArray();
        os.write(response);

        is.close();
        os.close();
    }
}

When I run the above code, this is my error stack trace.

OOF!
javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
    at sun.security.ssl.Alert.createSSLException(Unknown Source)
    at sun.security.ssl.TransportContext.fatal(Unknown Source)
    at sun.security.ssl.TransportContext.fatal(Unknown Source)
    at sun.security.ssl.TransportContext.fatal(Unknown Source)
    at sun.security.ssl.CertificateMessage$T12CertificateConsumer.checkServerCerts(Unknown Source)
    at sun.security.ssl.CertificateMessage$T12CertificateConsumer.onCertificate(Unknown Source)
    at sun.security.ssl.CertificateMessage$T12CertificateConsumer.consume(Unknown Source)
    at sun.security.ssl.SSLHandshake.consume(Unknown Source)
    at sun.security.ssl.HandshakeContext.dispatch(Unknown Source)
    at sun.security.ssl.HandshakeContext.dispatch(Unknown Source)
    at sun.security.ssl.TransportContext.dispatch(Unknown Source)
    at sun.security.ssl.SSLTransport.decode(Unknown Source)
    at sun.security.ssl.SSLSocketImpl.decode(Unknown Source)
    at sun.security.ssl.SSLSocketImpl.readHandshakeRecord(Unknown Source)
    at sun.security.ssl.SSLSocketImpl.startHandshake(Unknown Source)
    at sun.security.ssl.SSLSocketImpl.startHandshake(Unknown Source)
    at sun.net.www.protocol.https.HttpsClient.afterConnect(Unknown Source)
    at sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.connect(Unknown Source)
    at sun.net.www.protocol.https.HttpsURLConnectionImpl.connect(Unknown Source)
    at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:859)
    at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:887)
    at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:887)
    at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:829)
    at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:366)
    at org.jsoup.helper.HttpConnection.get(HttpConnection.java:353)
    at JSoupExtractor.main(JSoupExtractor.java:22)
Caused by: sun.security.validator.ValidatorException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
    at sun.security.validator.PKIXValidator.doBuild(Unknown Source)
    at sun.security.validator.PKIXValidator.engineValidate(Unknown Source)
    at sun.security.validator.Validator.validate(Unknown Source)
    at sun.security.ssl.X509TrustManagerImpl.validate(Unknown Source)
    at sun.security.ssl.X509TrustManagerImpl.checkTrusted(Unknown Source)
    at sun.security.ssl.X509TrustManagerImpl.checkServerTrusted(Unknown Source)
    ... 22 more
Caused by: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
    at sun.security.provider.certpath.SunCertPathBuilder.build(Unknown Source)
    at sun.security.provider.certpath.SunCertPathBuilder.engineBuild(Unknown Source)
    at java.security.cert.CertPathBuilder.build(Unknown Source)
    ... 28 more
Exception in thread "main" java.lang.NullPointerException
    at JSoupExtractor.main(JSoupExtractor.java:28)

To fix this error, I made the address used http instead of https, because https causes a lot of SSL errors for some reason. Also, I used a user agent connection to access the site since I know that many sites want real people visiting instead of bots. I am using a school computer, but Google is unblocked here. I believe I have exhausted the most common sources of error.

Some Guy
  • 143
  • 9
  • @ScaryWombat Google is the only problem website for me, other sites that use https are easily converted to http (by me) and show no problem. – Some Guy Feb 09 '22 at 02:13
  • 1
    Have you tried this code not on a school computer? There is a chance school computer has a forward proxy and response with it's own SSL certificate that your java doesn't trust. Check https://www.google.com certificate in chrome and see if it's signed by GTS Root CA .. Just a thought – Yan Feb 09 '22 at 02:14
  • @Yan Google is unblocked on my school computer – Some Guy Feb 09 '22 at 02:16
  • 1
    It's unblocked but school might have a forward proxy to inspect traffic like men in the middle. If that's the case when your app makes a request to google it connects to the proxy and then proxy connects to google gets the data and response to your app presenting proxies SSL certificate – Yan Feb 09 '22 at 02:17
  • Ok I have used the second answer from this link https://stackoverflow.com/questions/6659360/how-to-solve-javax-net-ssl-sslhandshakeexception-error to turn off my security measures. This is okay since I am using a safe site like google, correct? – Some Guy Feb 09 '22 at 02:41
  • That's fine when you write code to learn but not good when deploying your application in "production". – Yan Feb 09 '22 at 14:49

0 Answers0