import java.io.IOException;
import java.util.ArrayList;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
public class listGrabber {
public static void main(String[]args) {
try {
Document doc = Jsoup.connect("https://play.google.com/store/apps/category/GAME_ACTION/collection/topselling_free").get();
int count = 0;
Elements elements;
String url;
ArrayList<String> list = new ArrayList<>();
do{
elements = doc.select("a[class^=title]").get(count).select("a[class^=title]");
url = "";
url = elements.attr("abs:title").replaceAll("https://play.google.com/store/apps/category/GAME_ACTION/collection/","");
url = url.replaceAll("®|™","");
url = url.replaceAll("[(](.*)[)]","");
list.add(url);
System.out.println(url);
count++;
}while (url!="" &&url!=null);
// String divContents =
// doc.select(".id-app-orig-desc").first().text();
// elements.remove("div");
} catch (IOException e) {
}
}
}
As you can see above, I'm trying to grab a list of words from https://play.google.com/store/apps/category/GAME_ACTION/collection/topselling_free
The google play store page loads more elements every time you scroll to the bottom of the page.
My program will grab the first 40ish elements that show up but since jsoup doesn't load the rest of the webpage that loads dynamically, I can't grab any of the elements beyond the first 40.
Furthermore, if you scroll on the page to game #300, a Show More button appears, I'd also like to parse the elements beyond the show more button.
Is there any way for Jsoup to parse all the elements that would dynamically load on the page?