1

I am new to webscraping and have tried several methods to perform a rvest across multiple pages. Somehow it is still not working and I only get 15 results instead of the 207 products listed in this category. What am I doing wrong?

library(rvest)
all_df<-0
library(data.table)

for(i in 1:5){
  url_fonq <- paste0("https://www.fonq.nl/producten/categorie-lichtbronnen/?p=",i,sep="")
  webpage_fonq <- read_html(url_fonq)
  head(webpage_fonq)

  product_title_data_html <- html_nodes(webpage_fonq, '.product-title')
  product_title_data <- html_text(product_title_data_html)
  head(product_title_data)
  product_title_data<-gsub("\n","",product_title_data)
  product_title_data<-gsub(" ","",product_title_data)
  head(product_title_data)
  length(product_title_data)

  product_price_data_html <- html_nodes(webpage_fonq, '.product-price')
  product_price_data <- html_text(product_price_data_html)
  head(product_price_data)
  product_price_data<-gsub("\n","",product_price_data)
  product_price_data<-gsub(" ","",product_price_data)
  head(product_price_data)
  product_price_data
  length(product_price_data)
  fonq.df <- data.frame(Procuct_title = product_title_data, Price = product_price_data)
  all_df <-list(fonq.df)
  }

final2<-rbindlist(all_df,fill = TRUE)

View(final2)
Karima Be
  • 13
  • 2

1 Answers1

1

The problem is that you keep only the data scraped from the last page of the website, and thus you have the last 15 products stored only.

So instead of overwriting the all_df variable in every iteration

all_df <- list(fonq.df)

append the fonq.df dataframe at the end of the all_df:

all_df <- bind_rows(all_df, fonq.df)

Here is my complete solution:

library(rvest)
all_df <- list()
library(dplyr)

for(i in 1:5){

  url_fonq <- paste0("https://www.fonq.nl/producten/categorie-lichtbronnen/?p=",i,sep="")
  webpage_fonq <- read_html(url_fonq)
  head(webpage_fonq)

  product_title_data_html <- html_nodes(webpage_fonq, '.product-title')
  product_title_data <- html_text(product_title_data_html)
  head(product_title_data)
  product_title_data<-gsub("\n","",product_title_data)
  product_title_data<-gsub(" ","",product_title_data)
  head(product_title_data)
  length(product_title_data)

  product_price_data_html <- html_nodes(webpage_fonq, '.product-price')
  product_price_data <- html_text(product_price_data_html)
  head(product_price_data)
  product_price_data<-gsub("\n","",product_price_data)
  product_price_data<-gsub(" ","",product_price_data)
  head(product_price_data)
  product_price_data
  length(product_price_data)
  fonq.df <- data.frame(Procuct_title = product_title_data, Price = product_price_data)
  all_df <-bind_rows(all_df, fonq.df)
}

View(all_df)
Endre
  • 690
  • 8
  • 15