I've created a script to scrape two fields from a webpage using google apps script. The script seems to be working properly but it writes results in a spreadsheet on a per line basis.
What I did is grab the links of all the shops from landing page and then reuse the links within another function to parse shop name
and website link
from innerpages. Although the two fields are also available in landing page, I wish to parse them from their innerpages.
However, I wish to write the result in the spreadsheet doing batch update. How can I achieve that?
Current approach:
function parseYellowpages() {
var options = {
"method" : "GET",
"headers" : {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36"
}
};
var ss = SpreadsheetApp.getActiveSpreadsheet();
var activeSheet = ss.getSheetByName('Sheet1');
var base = "https://www.yellowpages.com"
var webURL = "https://www.yellowpages.com/search?search_terms=pizza&geo_location_terms=New+York%2C+NY";
var row = 1;
var response = UrlFetchApp.fetch(webURL,options);
var $ = Cheerio.load(response.getContentText());
const items = $("[class='result'] [class='info']");
for (i=0;i<items.length;i++){
var shopLink = base + $(items[i]).find('h2[class="n"] > a.business-name').attr('href');
var resultContainer;
resultContainer = getInnerpageInfo(shopLink,options);
activeSheet.getRange(row,1).setValue(resultContainer[0]);
activeSheet.getRange(row,2).setValue(resultContainer[1]);
console.log(resultContainer[0],resultContainer[1]);
row++;
};
}
function getInnerpageInfo(innerLink,options) {
var response = UrlFetchApp.fetch(innerLink,options);
var $ = Cheerio.load(response.getContentText());
var shopName = $('.sales-info > h1').first().text();
var website = $('a.website-link').first().attr('href');
return [shopName,website];
}
How to write result to a spreadsheet doing batch update?