I am trying to grab the URL of each Laptop that is on sale on the first 3 pages of this Amazon page
Every time I run the script, the driver.findElements(By.xpath) returns an inconsistent amount of URLs. The first page is pretty consistent and it return 4 URLs but page 2 and 3 can return anywhere between 1 and 4 URLs even though page 2 has 8 URLs I am looking for and page 3 has 4 URLs I am looking for.
I doubt the problem is in the grabData method since it grabs the data based on the inconsistent URLs list given. I am pretty new to this so I hope that all made sense. Any help would be appreciated. Let me know if you need more clarification
public static String dealURLsXpath = "//span[@data-a-strike=\"true\" or contains(@class,\"text-strike\")][.//text()]/parent::a[@class]";
public static List<String> URLs = new ArrayList<String>();
public static void main(String[] args)
{
//Initialize Browser
System.setProperty("webdriver.chrome.driver", "C:\\Users\\email\\eclipse-workspace\\ChromeDriver 81\\chromedriver.exe");
WebDriver driver = new ChromeDriver();
driver.manage().timeouts().implicitlyWait(5, TimeUnit.SECONDS);
//Search through laptops and starts at page 1
Search.searchLaptop(driver);
//Grabs data for each deal and updates Products List directly
listingsURL = driver.getCurrentUrl();
//updates the global URLs List with the URLs found by driver.findElements(By.xpath)
updateURLsList(driver);
//Iterates through each URL and grabs laptop information to add to products list
grabData(driver, URLs, "Laptop");
// Clears URLs list so that it can be populated by the URLs in the next page
URLs.clear();
// returns driver to Amazon page to click on "page 2" button to go to next page and repeat process
driver.get(listingsURL);
driver.findElement(By.xpath("//a [contains(@href,'pg_2')]")).click();
listingsURL = driver.getCurrentUrl();
updateURLsList(driver);
grabData(driver, URLs, "Laptop");
URLs.clear();
driver.get(listingsURL);
driver.findElement(By.xpath("//a [contains(@href,'pg_3')]")).click();
listingsURL = driver.getCurrentUrl();
updateURLsList(driver);
grabData(driver, URLs, "Laptop");
URLs.clear();
driver.get(listingsURL);
}
public static void updateURLsList(WebDriver driver)
{
//list of deals on amazon page
/////////////////////////////////////////////INCONSISTENT/////////////////////////////////////////////
List<WebElement> deals = driver.findElements(By.xpath(dealURLsXpath));
//////////////////////////////////////////////////////////////////////////////////////////////////////
System.out.println("Deals Size: " + deals.size());
for(WebElement element : deals)
{
URLs.add(element.getAttribute("href"));
}
System.out.println("URL List size: " + URLs.size());
deals.clear();
}
public static void grabData(WebDriver driver, List<String> URLs, String category)
{
for(String url : URLs)
{
driver.get(url);
String name = driver.findElement(By.xpath("//span [@id = \"productTitle\"]")).getText();
System.out.println("Name: " + name);
String price = driver.findElement(By.xpath("//span [@id = \"priceblock_ourprice\"]")).getText();
System.out.println("price: " + price);
String Xprice = driver.findElement(By.xpath("//span [@class = \"priceBlockStrikePriceString a-text-strike\"]")).getText();
System.out.println("Xprice: " + Xprice);
String picURL = driver.findElement(By.xpath("//img [@data-old-hires]")).getAttribute("src");
System.out.println("picURL: " + picURL);
BufferedImage img;
System.out.println("URL: " + url);
try
{
img = ImageIO.read(new URL(picURL));
products.add(new Product(
name,
Integer.parseInt(price.replaceAll("[^\\d.]", "").replace(".", "").replace(",", "")),
Integer.parseInt(Xprice.replaceAll("[^\\d.]", "").replace(".", "").replace(",", "")),
img,
category,
url));
}
catch(IOException e)
{
System.out.println("Error: " + e.getMessage());
}
}