so a quick overview of what I'm doing I am using Android Webview to Render JavaScript and then reading the HTML from the javascript to parse it.
I am currently having trouble with retrieving the HTML from a website called Sport Chek.
Here is the code for my SportChekSearch class:
public class SportChekSearch extends SearchQuery{
public Elements finalDoc;
private ArrayList<Item> processed;
private final Handler uiHandler = new Handler();
public int status = 0;
//This basically is just so that the class knows which Activity we're working with
private Context c;
protected class JSHtmlInterface {
@android.webkit.JavascriptInterface
public void showHTML(String html) {
final String htmlContent = html;
uiHandler.post(
new Runnable() {
@Override
public void run() {
Document doc = Jsoup.parse(htmlContent);
}
}
);
}
}
/**
* Constructor method
* @param context The context taken from the webview (So that the asynctask can show progress)
*/
public SportChekSearch(Context context, String query) {
final Context c = context;
try {
final WebView browser = new WebView(c);
browser.setVisibility(View.INVISIBLE);
browser.setLayerType(View.LAYER_TYPE_NONE, null);
browser.getSettings().setJavaScriptEnabled(true);
browser.getSettings().setBlockNetworkImage(true);
browser.getSettings().setDomStorageEnabled(true);
browser.getSettings().setCacheMode(WebSettings.LOAD_NO_CACHE);
browser.getSettings().setLoadsImagesAutomatically(false);
browser.getSettings().setGeolocationEnabled(false);
browser.getSettings().setSupportZoom(false);
browser.getSettings().setUserAgentString("Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36");
browser.addJavascriptInterface(new JSHtmlInterface(), "JSBridge");
browser.setWebViewClient(
new WebViewClient() {
@Override
public void onPageStarted(WebView view, String url, Bitmap favicon) {
super.onPageStarted(view, url, favicon);
}
@Override
public void onPageFinished(WebView view, String url) {
browser.loadUrl("javascript:window.JSBridge.showHTML('<html>'+document.getElementsByTagName('html')[0].innerHTML+'</html>');");
}
}
);
browser.loadUrl("https://www.sportchek.ca/search.html#q=" + query.replaceAll(" ", "+") + "&lastVisibleProductNumber=3");
browser.loadUrl(browser.getUrl());
final String link = browser.getUrl();
new fetcher(c).execute(link);
}
catch(Exception e){
e.printStackTrace();
}
//Get the link from the WebView, and save it in a final string so it can be accessed from worker thread
}
/**
* This subclass is a worker thread meaning it does work in the background while the user interface is doing something else
* This is done to prevent "lag".
* To call this class you must write fetcher(Context c).execute(The link you want to connect to)
*
*/
class fetcher extends AsyncTask<String, Void, Elements> {
Context mContext;
ProgressDialog pdialog;
public fetcher(Context context) {
mContext = context;
}
@Override
protected void onPreExecute() {
super.onPreExecute();
pdialog = new ProgressDialog(mContext);
pdialog.setTitle(R.string.finding_results);
pdialog.setCancelable(false);
pdialog.show();
}
//This return elements because the postExecute() method needs an Elements object to parse its results
@Override
protected Elements doInBackground(String... strings) {
//You can pass in multiple strings, so this line just says to use the first string
String link = strings[0];
//For Debug Purposes, Do NOT Remove - **Important**
System.out.println("Connecting to: " + link);
try {
doc = Jsoup.connect(link)
.ignoreContentType(true)
.userAgent("Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36")
.timeout(10000)
.get();
finalDoc = doc.select("body section.product-grid-wrapper");
System.out.println(finalDoc.toString());
} catch (IOException e) {
e.printStackTrace();
}
return finalDoc;
}
@Override
protected void onPostExecute(Elements result) {
//This line clears the list of info in the Search activity
//I should probably be using a getter method but adapter is a static variable so it shouldn't matter
//parse seperates document into elements
//crunch results formats those elements into item objects
//I am saving the result of this to an ArrayList<Item> called "processed"
processed = crunchResults(result);
//For debug purposes, do NOT remove - **Important**
System.out.println(processed.size() + " results have been crunched by Sport Chek.");
//Adds all of the processed results to the list of info in Search activity
ClothingSearch.adapter.addAll(processed);
//For debug purposes, do NOt remove - **Important
System.out.println("Adapter has been notified by Sport Chek.");
//Closes the progress dialog called pdialog assigned to the AsyncTask
pdialog.dismiss();
ClothingSearch.adapter.notifyDataSetChanged();
SearchQueueHandler.makeRequest(mContext, processed, SearchQueueHandler.CLOTHING_SEARCH);
}
}
public ArrayList<Item> crunchResults(Elements e){
ArrayList<Item> results = new ArrayList<Item>();
try {
for (int i = 0; i < e.size(); i++) {
Element ele = e.get(i);
String link = "https://www.sportchek.ca" + ele.select(" a.product-grid__link").attr("href");
System.out.println("https://www.sportchek.ca" + ele.select(" a.product-grid__link").attr("href"));
String title = ele.select(" span.product-title-text").text();
String pricestring = ele.select(" span.product-price__wrap").text();
price = Double.parseDouble(pricestring.substring(pricestring.lastIndexOf("$")));
System.out.println(pricestring);
//*******************************************
String store = "Sport Chek";
//Adds the formatted item to an ArrayList of items
results.add(new Item(title, store, price, link));
//Prints the object's to String to console
//For debug purposes, do NOT remove - **Important
System.out.println(results.get(i).toString());
}
} catch (Exception a){
a.printStackTrace();
}
return results;
}
public int getStatus(){
return status;
}
}
The two relevant methods are doInBackground in my AsyncTask and the crunchResults method.
Here is the result I get from using Ctrl+Shift+I on the actual website (Desired Result):
But when running the above code and using a println here is the result that I get for the tag section class="product-grid-wrapper" :
<section class="product-grid-wrapper">
<ul data-module-type="SearchProductGrid" class="product-grid__list product-grid__list_quickview">
<!-- #product-grid__item-template -->
</ul>
</section>
Can anyone help me figure out why I am not getting my desired result?
All help is appreciated
EDIT: for this specific search that the println data was collected from, the link was https://www.sportchek.ca/search.html#q=men+coat&lastVisibleProductNumber=3