I want to use JSoup to extract some data from the reviews section on Amazon, and then store the data in a HashMap.
For a given Amazon product, I want to extract some of the reviewers' names and impact. A reviewer's impact is a number available on the reviewer's public profile page.
Extracting the reviewers' names works fine but I'm having a problem extracting the impact (see code and error message below).
Thanks for any help!
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.HashMap;
public class Question {
public static void main(String[] args) throws IOException {
HashMap<String, String> reviewers = new HashMap<String, String>();
Document reviewPage = Jsoup.connect("https://www.amazon.co.uk/Charles-Dickens-Complete-Christmas-Collection/dp/B08FRBWTNX/ref=sr_1_1_sspa?crid=USM5FCL8WJZ4&keywords=charles+dickens&qid=1678359627&sprefix=charles+dickens%2Caps%2C127&sr=8-1-spons&sp_csd=d2lkZ2V0TmFtZT1zcF9hdGY&psc=1").get();
Elements reviewPageElements = reviewPage.select(".review");
for (Element reviewPageElement : reviewPageElements) {
// reviewer's name
Element nameElement = reviewPageElement.getElementsByClass("a-profile-name").first();
String name = nameElement.text();
// reviwer's profile page
Element linkElement = reviewPageElement.getElementsByClass("a-profile").first();
String link = linkElement.attr("href");
String url = "https://www.amazon.co.uk" + link;
// reviwer's impact
Document profilePage = Jsoup.connect(url).get();
Elements impactElement = profilePage.getElementsByClass("impact-text");
String impact = impactElement.text();
reviewers.put(name, impact);
}
}
}
ERROR MESSAGE:
Exception in thread "main" org.jsoup.HttpStatusException: HTTP error fetching URL. Status=503, URL=https://www.amazon.co.uk/Charles-Dickens-Complete-Christmas-Collection/dp/B08FRBWTNX/ref=sr_1_1_sspa?crid=USM5FCL8WJZ4&keywords=charles+dickens&qid=1678359627&sprefix=charles+dickens%2Caps%2C127&sr=8-1-spons&sp_csd=d2lkZ2V0TmFtZT1zcF9hdGY&psc=1
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:459)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:475)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:434)
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:181)
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:170)
at Question.main(Question.java:48)