0

I would like to ask why this for loop is not working and how to fix it to web scrape this page:

library(rvest)
library(dplyr)

my_list <- list()

for (i in 2:20) {
  link <- paste0("https://www.immobilienscout24.at/regional/wien/wien/immobilie-kaufen/seite-", i)
  page <- read_html(link)
  
  price <- page %>% 
    html_elements(".tSnnN") %>% 
    html_text()
  
  rooms <- page %>% 
    html_elements(".ufaLY:nth-child(1)") %>% 
    html_text()
  
  m2 <- page %>% 
    html_elements(".ufaLY:nth-child(2)") %>% 
    html_text()
  
  location <- page %>% 
    html_elements(".YqNih") %>% 
    html_text()
  
  my_list[[i]] <- list(price, rooms, m2, location)
  flat <- do.call(rbind.data.frame, my_list)
  colnames(flat) <- c("price", "rooms", "m2", "location")
  print(paste("Page:", i))
  
}

getting error:

Error in (function (..., deparse.level = 1, make.row.names = TRUE, stringsAsFactors = default.stringsAsFactors(), : invalid list argument: all variables should have the same length

Dave2e
  • 22,192
  • 18
  • 42
  • 50
Jaroslav Kotrba
  • 283
  • 1
  • 14
  • 1
    If you run in debug mode, you'll see that by second iteration (i=3), you have 25 rooms, but only 24 surfaces (m2), hence the error : one of the the adds doesn't have a surface field : you should check this before trying to rbind the list – Waldi Jan 26 '22 at 20:55
  • Maybe this question/answer will help: https://stackoverflow.com/questions/56673908/how-do-you-scrape-items-together-so-you-dont-lose-the-index/56675147#56675147 – Dave2e Jan 26 '22 at 23:19
  • Thank you do you know how to fix it in the data frame that it would return NA when the value is not available? – Jaroslav Kotrba Jan 27 '22 at 08:38

0 Answers0