When I try to use jsoup example in my Android app on page like this, then no images are found. It's because content is loaded through javascript?
Now my code look like this:
Document doc = Jsoup.connect(url).get();
Elements media = doc.select("[src]");
print("\nMedia: (%d)", media.size());
List<Link> tmpLinks = new ArrayList<>();
int i = 0;
for (Element src : media) {
if (src.tagName().equals("img")) {
if (!src.attr("abs:src").contains(".png") && !src.attr("abs:src").contains(".gif")) {
String widthString = src.attr("width");
int width;
if (!widthString.isEmpty()) {
width = Integer.parseInt(src.attr("width"));
}
else width = 0;
String heightString = src.attr("height");
int height;
if (!heightString.isEmpty()) {
height = Integer.parseInt(src.attr("height"));
}
else height = 0;
if (width == 0 || width >= minAllowedWidth) {
if (!src.attr("abs:src").isEmpty()) {
tmpLinks.add(new Link(i, src.attr("abs:src"), width, height));
}
}
i++;
print(" * %s: <%s> %sx%s (%s)",
src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"),
trim(src.attr("alt"), 20));
}
}
}
List<Link> noDuplicates = new ArrayList<>();
Set<String> titles = new HashSet<>();
for (Link link : tmpLinks ) {
if (titles.add(link.getUrl())) {
noDuplicates.add(link);
}
}
List<Link> finalLinks = new ArrayList<>();
for (Link link : noDuplicates) {
URL testUrl = new URL(link.getUrl());
URLConnection urlConnection = testUrl.openConnection();
urlConnection.connect();
int file_size = urlConnection.getContentLength();
System.out.println("Fetching size: " + link.getUrl() + " " + file_size);
if (file_size >= minAllowedFileSize) {
link.setSize(file_size);
finalLinks.add(link);
}
}
Collections.sort(finalLinks, new LinkSizeComparator());
The second question is, can I analyze with jsoup subpages (or image links) on site like this.