I'm working on a google chrome extension, part of the purpose of which is to gather all Amazon user reviews for any specific product listed on Amazon.com, or Amazon.co.uk for manipulation. Amazon reviews are not all stored under one url; instead, Amazon lists up to 10 reivews per page. The initial thought I had in regards to this was to use the fetch API to gather the 10 reviews on a specific page, adding them to an array, before continuing on to the next page, stopping when no next page is defined. The only problem with this is that it can require hundreds of calls to the fetch API per product, taking a long time to complete.
let contentArray = [];
let reviewArray = [];
function collectProductComments(){
let parser = new DOMParser();
let url = document.getElementsByClassName("a-link-emphasis")[0].href;
getFirstTen(url, parser);
}
function getFirstTen(url, parser){ //function for the collection of the initial 10 elements containing a user review of a specific product
if(isGood(url)){
fetch(url) //fetches data from page specified by 'url' variable
.then(response => response.text()) //Specify response as text
.then(data => {
console.log("Collecting reviews...");
let doc = parser.parseFromString(data, "text/html"); //Parse response to DOM
for(let i = 0 ; i < doc.getElementsByClassName("review").length ; i++){
reviewArray.push(doc.getElementsByClassName("review")[i]); //Iterate through reviews, append them to array
}
if(doc.getElementById("cm_cr-pagination_bar") != undefined){ //check if "next page" button exists
nextURL(doc); //handle next pages.
}else{
collectionResolved(); //If no "next page" button exists, treat as though all reviews have been collected.
}
})
.catch(function(error) {
console.log(error);
});
}
}
function nextURL(doc, parser){
url = doc.getElementById("cm_cr-pagination_bar").children[0].lastChild.children[0].href; //Get URL of the page containing the next 10 reviews
if (isGood(url)){ //If the next page exists....
fetch(url)
.then(response => response.text()) //Specify response as text
.then(data => {
doc = parser.parseFromString(data, "text/html"); //Parse response as DOM
for(let i = 0 ; i < doc.getElementsByClassName("review").length ; i++){
reviewArray.push(doc.getElementsByClassName("review")[i]); //Iterate through reviews, append them to array
}
nextURL(doc); //Assume there is a next page
})
.catch(function(error) {
console.log(error);
});
}
else{ //This is fired when there is no next page to check
collectionResolved(); //treat as though all reviews have been collected
}
}
function collectionResolved(){
console.log("Review collection resolved.");
contentArray = handleReviews(reviewArray); //Logic for searching through the DOM of the reviews.
console.log(contentArray);
saveReviews(contentArray);
}
function isGood(url){
if (url == undefined){
return false;
}else{return true;}
}
function handleReviews(elementsToCheck){
let tempContentArray = [];
for(let i = 0 ; i < elementsToCheck.length ; i++){
tempContentArray[i] = [getUser(elementsToCheck[i]), getTitle(elementsToCheck[i]), getComment(elementsToCheck[i])]; //Dissect each review DOM element into appropriate text.
}
return tempContentArray;
}
I'm very new to this sort of thing - please feel free to suggest any corrections or improvements, or point out any instances of bad practice.
Does anyone know of any method that could be used either to optimise this code, or to produce a superior method of achieving the same result?