I'm very exited to be working on a nodeJS REST API project that provides top sales data about the largest ecommerce giant in Latin America (MercadoLibre) by webscraping the heck out of it with Cheerio which is like jQuery for the backend
like many others they use a non specific number of nested categories to organize their products, and im trying to map the hole category tree automatically to get back every category's "most sold" list to then save it and provide it as an API.
My problem is that the recuring functions don't seem await fetching with Axios but let me explain how it works first so you can follow along
The first thing I did was build a sort of kickstart function, that would map the main category tree to then call the recursive functions to iterate over every (n)*sub-categories.... like the one down below
const categoriasURL = 'https://www.mercadolibre.com.ar/categorias#nav-header'
async function checkCategories() {
try {
let response = await axios(categoriasURL);
let html = response.data
let $ = cheerio.load(html)
let categories = {}
let subcategories = {}
$('.categories__container', html).each( async function () {
let categoryName = lodash.kebabCase(lodash.deburr($(this).find('.categories__title a').text()))
$(this).children().find('.categories__item').each(async function(){
let subcategory = lodash.kebabCase(lodash.deburr($(this).find('a').text()))
let subcatList
try {
let subcategoryFetch = await axios($(this).find('a').attr('href'));
console.log('CAT ' + $(this).find('a').attr('href'))
subcatList = await checkSubcategories (subcategoryFetch)
} catch (error) {
console.log("error en checkCategories")
}
})
})
} catch (error) {
console.log(error)
}
}
after maping the main categories the function above calls checkSubcategories (subcategoryFetch)
to check if there are more subcategories to be checked and if so it will call fetchSubcat(url)
to fetch them and call checkSubcategories (subcategoryFetch)
function again and so on and so forth
checkSubcategories (fetched):
async function checkSubcategories (fetched){
let html = fetched.data
let subcatList = {}
$ = cheerio.load(html)
if ($('.ui-search-filter-dl .ui-search-filter-dt-title:contains("Categorías")').length) {
$('.ui-search-filter-dl .ui-search-filter-dt-title:contains("Categorías")').parent().children().find('.ui-search-filter-container').each( async function(){
let subcategoryName = lodash.kebabCase(lodash.deburr($(this).find('.ui-search-filter-name').text()))
try {
let url = await $(this).find('a').attr('href')
let fetchedData = await fetchSubcat(url)
//
} catch (error) {
console.log("error en checkSubcategories")
}
})
} else {
return {moreSubcat: false, url:"url"}
}
}
fetchSubcat(url):
async function fetchSubcat(url){
try {
let subcategoryFetch = await axios(url);
let subcatList
subcatList = await checkSubcategories (subcategoryFetch)
return subcatList
} catch (error) {
console.log('error en fetchSubcat')
}
}