You can use a combination of JSoup and HtmlUnit to get the page contents after JavaScript scripts are done loading.
pom.xml
<dependency>
<groupId>net.sourceforge.htmlunit</groupId>
<artifactId>htmlunit</artifactId>
<version>3.35</version>
</dependency>
Simple Example From file https://riptutorial.com/jsoup/example/16274/parsing-javascript-generated-page-with-jsoup-and-htmunit
// load page using HTML Unit and fire scripts
WebClient webClient2 = new WebClient();
HtmlPage myPage = webClient2.getPage(new File("page.html").toURI().toURL());
// convert page to generated HTML and convert to document
Document doc = Jsoup.parse(myPage.asXml());
// iterate row and col
for (Element row : doc.select("table#data > tbody > tr"))
for (Element col : row.select("td"))
// print results
System.out.println(col.ownText());
// clean up resources
webClient2.close();
A Complex Example: Load login, get Session and CSRF, then post and wait for home page to finish loading (15 seconds)
import java.io.IOException;
import java.net.HttpCookie;
import java.net.MalformedURLException;
import java.net.URL;
import org.jsoup.Connection;
import org.jsoup.Connection.Method;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.HttpMethod;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebRequest;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
//JSoup load Login Page and get Session Details
Connection.Response res = Jsoup.connect("https://loginpage").method(Method.GET).execute();
String sessionId = res.cookie("findSESSION");
String csrf = res.cookie("findCSRF");
HttpCookie cookie = new HttpCookie("findCSRF", csrf);
cookie.setDomain("domain.url");
cookie.setPath("/path");
WebClient webClient = new WebClient();
webClient.addCookie(cookie.toString(),
new URL("https://url"),
"https://referrer");
// Add other cookies/ Session ...
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setCssEnabled(false);
webClient.getOptions().setUseInsecureSSL(true);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.getCookieManager().setCookiesEnabled(true);
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
// Wait time
webClient.waitForBackgroundJavaScript(15000);
webClient.getOptions().setThrowExceptionOnScriptError(false);
URL url = new URL("https://login.path");
WebRequest requestSettings = new WebRequest(url, HttpMethod.POST);
requestSettings.setRequestBody("user=234&pass=sdsdc&CSRFToken="+csrf);
HtmlPage page = webClient.getPage(requestSettings);
// Wait
synchronized (page) {
try {
page.wait(15000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
// Parse logged in page as needed
Document doc = Jsoup.parse(page.asXml());