Here is the code. I am using the JSoup library to access the HTML elements and get the locations of the images. Then, I pass those image locations into an image downloader that downloads the images to the desktop.
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
import java.net.URLConnection;
import org.jsoup.Jsoup;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class JSoupExtractor {
public static int i = 1;
public static void main (String[] args) {
Validate.isTrue(true, "usage: supply url to fetch");
String destinationFile = "C:\\Users\\MYNAME\\Desktop\\Output\\image";
Document doc = null;
try {
doc = Jsoup.connect("http://google.com/").get();
} catch (IOException e1) {
// TODO Auto-generated catch block
System.out.println("OOF!");
e1.printStackTrace();
}
Elements pngs = doc.select("img[src$=.png]");
Elements jpgs = doc.select("img[src$=.jpg]");
//int i = 1;
for (Element e : pngs) {
System.out.println("hi");
System.out.println(e.attr("abs:src"));
try {
saveImage(e.attr("abs:src"), destinationFile + (i) + ".png");
} catch (Exception e1) {
System.out.println("DANG!");
e1.printStackTrace();
}
i++;
}
}
public static void saveImage(String imageUrl, String destinationFile) throws IOException {
URL url = new URL(imageUrl);
URLConnection connection = url.openConnection();
connection.setRequestProperty("User-Agent", "Chrome/56.0.2924.87");
InputStream is = connection.getInputStream();
FileOutputStream os = new FileOutputStream(destinationFile);
ByteArrayOutputStream out = new ByteArrayOutputStream();
byte[] b = new byte [4096];
int length;
while ((length = is.read(b)) >= 0) {
System.out.println(i);
out.write(b, 0, length);
}
byte[] response = out.toByteArray();
os.write(response);
is.close();
os.close();
}
}
When I run the above code, this is my error stack trace.
OOF!
javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
at sun.security.ssl.Alert.createSSLException(Unknown Source)
at sun.security.ssl.TransportContext.fatal(Unknown Source)
at sun.security.ssl.TransportContext.fatal(Unknown Source)
at sun.security.ssl.TransportContext.fatal(Unknown Source)
at sun.security.ssl.CertificateMessage$T12CertificateConsumer.checkServerCerts(Unknown Source)
at sun.security.ssl.CertificateMessage$T12CertificateConsumer.onCertificate(Unknown Source)
at sun.security.ssl.CertificateMessage$T12CertificateConsumer.consume(Unknown Source)
at sun.security.ssl.SSLHandshake.consume(Unknown Source)
at sun.security.ssl.HandshakeContext.dispatch(Unknown Source)
at sun.security.ssl.HandshakeContext.dispatch(Unknown Source)
at sun.security.ssl.TransportContext.dispatch(Unknown Source)
at sun.security.ssl.SSLTransport.decode(Unknown Source)
at sun.security.ssl.SSLSocketImpl.decode(Unknown Source)
at sun.security.ssl.SSLSocketImpl.readHandshakeRecord(Unknown Source)
at sun.security.ssl.SSLSocketImpl.startHandshake(Unknown Source)
at sun.security.ssl.SSLSocketImpl.startHandshake(Unknown Source)
at sun.net.www.protocol.https.HttpsClient.afterConnect(Unknown Source)
at sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.connect(Unknown Source)
at sun.net.www.protocol.https.HttpsURLConnectionImpl.connect(Unknown Source)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:859)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:887)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:887)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:829)
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:366)
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:353)
at JSoupExtractor.main(JSoupExtractor.java:22)
Caused by: sun.security.validator.ValidatorException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
at sun.security.validator.PKIXValidator.doBuild(Unknown Source)
at sun.security.validator.PKIXValidator.engineValidate(Unknown Source)
at sun.security.validator.Validator.validate(Unknown Source)
at sun.security.ssl.X509TrustManagerImpl.validate(Unknown Source)
at sun.security.ssl.X509TrustManagerImpl.checkTrusted(Unknown Source)
at sun.security.ssl.X509TrustManagerImpl.checkServerTrusted(Unknown Source)
... 22 more
Caused by: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target
at sun.security.provider.certpath.SunCertPathBuilder.build(Unknown Source)
at sun.security.provider.certpath.SunCertPathBuilder.engineBuild(Unknown Source)
at java.security.cert.CertPathBuilder.build(Unknown Source)
... 28 more
Exception in thread "main" java.lang.NullPointerException
at JSoupExtractor.main(JSoupExtractor.java:28)
To fix this error, I made the address used http instead of https, because https causes a lot of SSL errors for some reason. Also, I used a user agent connection to access the site since I know that many sites want real people visiting instead of bots. I am using a school computer, but Google is unblocked here. I believe I have exhausted the most common sources of error.