I have this code, what I want is to wait for the Promise.all
after the comment EXECUTION to be finished and then proceed to do other task. Notice that I'm using PromiseJS, not BlueBird. I've searched questions about await/async but none of them work for me. Sorry if you feel the code is long as I want to expose it all. Maybe without doing this some of you might say "maybe there is a bug somewhere ".
// ================ load the lib ================
const curl = require("curl");
const jsdom = require('jsdom');
const cheerio = require('cheerio');
const Promise = require("promise");
// ================ declare global constants ================
const domain = "https://www.bankmega.com";
var url = domain + "/ajax.promolainnya.php";
const categories = [1, 2, 3, 4, 5, 6]; // hard-code subcat but later can be changed
// simply by loading the main page, then get all the subcat
// and convert to an appropriate integer array representing the
// categories
var visited = new Set(); // store visited links (not to scrap an item twice)
// ================ declare methods ================
function getItemLinksOfCat(url, subcat) {
const subCatURL = url + "?product=&subcat=" + subcat;
curl.get(subCatURL, null, (err, resp, body) => {
const {JSDOM} = jsdom;
const dom = new JSDOM(body);
const $ = (require("jquery"))(dom.window);
var tds = $("table[class=tablepaging] tr").children();
var maxPage = getMaxPage(tds, $);
var itemLinks = getItemLinks(maxPage, $, subcat);
// itemLinks.forEach(itemLink => {
// if (!visited.has(itemLink)) {
// visited.add(itemLink);
// scrapItem(itemLink);
// }
// });
Promise.all(itemLinks.map(function(itemLink) {
if (!visited.has(itemLink)) {
visited.add(itemLink);
scrapItem(itemLink);
}
}));
});
}
function getItemLinks(maxPage, $, subcat) {
var itemLinks = [];
var product = "";
for (var i = 1; i <= maxPage; ++i) {
var page = i;
$("#contentpromolain2").load("ajax.promolainnya.php?product="+product+"&subcat="+subcat+"&page="+page);
var lis = $("ul#promolain").children();
for (var j = 0; j < lis.length; ++j) {
var itemLink = $(lis[j]).find("a").attr("href");
itemLinks.push(itemLink);
}
}
return itemLinks;
}
function getMaxPage(tds, $) {
var maxPage = -1;
for(var i = 0; i < tds.length; ++i ){
var td = $(tds[i]);
var page = parseInt(td.text());
if (page != NaN && page > maxPage) {
maxPage = page;
}
}
return maxPage;
}
/*
Using wrapper method might be useful in the future
As we can redirect a call to an appropriate method
that can handle a specific type of item
*/
function scrapItem(itemLink) {
if(itemLink.includes("promo_detail")) {
scrapPromoDetail(itemLink);
}
}
/*
Actual method to scrap promo item
We can have other methods to scrap other types of item
*/
function scrapPromoDetail(itemLink) {
itemLink = domain + "/" + itemLink;
curl.get(itemLink, null, (err, resp, body) => {
if (resp != undefined && resp.statusCode == 200) {
var s = parseItemHTMLToString(body, itemLink);
console.log(s);
console.log("========");
}
});
}
/*
Helper function to parse item's html to string
Return a string contains item's property-value pairs
*/
function parseItemHTMLToString(html, itemLink) {
const $ = cheerio.load(html);
var promoSection = $("div#contentpromolain2");
var promoProperties = promoSection.find("div");
var dict = {};
for (var i = 0; i < promoProperties.length; ++i) {
var div = $(promoProperties[i]);
var klass = div.attr("class");
var text = div.text().trim();
if (klass !== undefined) {
if (klass === "titleinside") { // title
dict[klass] = text;
} else {
if (klass === "periode" || klass === "area" ) { // other props
var token = text.split(":");
text = token[1];
if (klass === "periode") {
token = text.split("-");
for(var j = 0; j < token.length; ++j) {
token[j] = token[j].trim();
}
dict[klass] = token.join(" - ");
} else { // area
dict[klass] = text;
}
} else if (klass === "keteranganinside") { // promo image
dict[klass] = domain + div.find("img").attr("src").trim();
} else { // other props
dict[klass] = text;
}
}
}
}
return dict;
}
// ================ EXECUTION ================
Promise.all(categories.map(function(subcat) {
getItemLinksOfCat(url, subcat)
}));
// do other tasks after Promise.all
EDIT 1 I've tried this:
// ================ EXECUTION ================
async function ttt() {
await Promise.all(categories.map(function(subcat) {
getItemLinksOfCat(url, subcat)
}));
// do other tasks after Promise.allc
}
ttt().then( result => {
console.log("Finish");
});
but it didn't work.
Here's the portion of the output:
Finish
{ titleinside: 'Mega Cellular - Free Tempered Glass',
area: ' Pontianak',
periode: '20 Juli 2018 - 18 Oktober 2018',
keteranganinside:
'https://www.bankmega.com/files/images/00-landing-page-MEGACELL.jpg' }
========
EDIT 2 Hi HoldOffHunder, you mean this?
// ================ EXECUTION ================
async function test() {
await Promise.all(categories.map(function(subcat) {
getItemLinksOfCat(url, subcat)
}));
// do other tasks after Promise.allc
console.log("???");
}
test();
It also printed out "???" before running.