More optimised alternative to repeated calls of the fetch API to gather data from multiple webpages of the same domain

Question

I'm working on a google chrome extension, part of the purpose of which is to gather all Amazon user reviews for any specific product listed on Amazon.com, or Amazon.co.uk for manipulation. Amazon reviews are not all stored under one url; instead, Amazon lists up to 10 reivews per page. The initial thought I had in regards to this was to use the fetch API to gather the 10 reviews on a specific page, adding them to an array, before continuing on to the next page, stopping when no next page is defined. The only problem with this is that it can require hundreds of calls to the fetch API per product, taking a long time to complete.

let contentArray = [];
let reviewArray = [];

function collectProductComments(){
  let parser = new DOMParser();
  let url = document.getElementsByClassName("a-link-emphasis")[0].href;

  getFirstTen(url, parser);
}

function getFirstTen(url, parser){ //function for the collection of the initial 10 elements containing a user review of a specific product
  if(isGood(url)){
    fetch(url)  //fetches data from page specified by 'url' variable
      .then(response => response.text())  //Specify response as text
      .then(data => {
        console.log("Collecting reviews...");
        let doc = parser.parseFromString(data, "text/html");  //Parse response to DOM
        for(let i = 0 ; i < doc.getElementsByClassName("review").length ; i++){
          reviewArray.push(doc.getElementsByClassName("review")[i]);  //Iterate through reviews, append them to array
        }
        if(doc.getElementById("cm_cr-pagination_bar") != undefined){  //check if "next page" button exists
          nextURL(doc); //handle next pages.
        }else{
          collectionResolved(); //If no "next page" button exists, treat as though all reviews have been collected.
        }
      })
      .catch(function(error) {
        console.log(error);
      });
  }
}

function nextURL(doc, parser){
  url = doc.getElementById("cm_cr-pagination_bar").children[0].lastChild.children[0].href;  //Get URL of the page containing the next 10 reviews
  if (isGood(url)){ //If the next page exists....
    fetch(url)
      .then(response => response.text())  //Specify response as text
      .then(data => {
        doc = parser.parseFromString(data, "text/html"); //Parse response as DOM
        for(let i = 0 ; i < doc.getElementsByClassName("review").length ; i++){
          reviewArray.push(doc.getElementsByClassName("review")[i]);         //Iterate through reviews, append them to array
        }
        nextURL(doc); //Assume there is a next page
      })
      .catch(function(error) {
        console.log(error);
      });
  }
  else{ //This is fired when there is no next page to check
    collectionResolved(); //treat as though all reviews have been collected
  }
}

function collectionResolved(){
  console.log("Review collection resolved.");
  contentArray = handleReviews(reviewArray); //Logic for searching through the DOM of the reviews.
  console.log(contentArray);
  saveReviews(contentArray);
}

function isGood(url){
  if (url == undefined){
    return false;
  }else{return true;}
}

function handleReviews(elementsToCheck){
  let tempContentArray = [];
  for(let i = 0 ; i < elementsToCheck.length ; i++){
    tempContentArray[i] = [getUser(elementsToCheck[i]), getTitle(elementsToCheck[i]), getComment(elementsToCheck[i])]; //Dissect each review DOM element into appropriate text.
  }
  return tempContentArray;
}

I'm very new to this sort of thing - please feel free to suggest any corrections or improvements, or point out any instances of bad practice.

Does anyone know of any method that could be used either to optimise this code, or to produce a superior method of achieving the same result?

There [seems to be an API](https://docs.aws.amazon.com/AWSECommerceService/latest/DG/EX_RetrievingCustomerReviews.html) so try looking for examples/info if it's usable in your case. — wOxxOm, Feb 05 '19 at 14:28
@wOxxOm This looked really promising. Unfortunately the request for this API must be signed with both your AWS public and secret keys, therefore cannot be done without hosting the request on a server (which I might take a look into at some point, but for the time being it's too time consuming). [One source here](https://stackoverflow.com/questions/18290195/can-the-amazon-product-api-be-accessed-from-javascript-chrome-extension). This source is from 5 years ago, but I've read the changes to the documentation and it still requires a signed request — Ronno, Feb 06 '19 at 10:38

More optimised alternative to repeated calls of the fetch API to gather data from multiple webpages of the same domain

0 Answers0