I was trying to scrape a website to extract the data from many pages using rvest and purrr. but everytime I run the code the "Error in open.connection(x, "rb") : HTTP error 404." appears.
url <- "http://books.toscrape.com/catalogue/page-%d"
map_df(1:10, function(i){
page <- read_html(sprintf(url, i))
cat(".")
booksdf <- data.frame(safely( title <- html_nodes(page, "h3, #title") %>% html_text(),
price <- html_nodes(page, ".price_color") %>% html_text() %>% gsub("£", "", .),
rating <- html_nodes(page, ".star-rating") %>% html_attrs() %>% str_remove("star-rating") %>%str_replace_all(c("One" = "1", "Two" = "2", "Three" = "3", "Four" = "4", "Five" = "5")) %>% as.numeric()
)
)
}
)
Error in open.connection(x, "rb") : HTTP error 404.