I want to scrape a website using Google Apps Script. unfortunately, it giving me an error 406.
below is the full details of the error:
resp:
"<html><head><title>Error 406 - Not Acceptable</title><head><body><h1>Error 406 - Not Acceptable</h1><p>Generally a 406 error is caused because a request has been blocked by Mod Security. If you belie…"
Below is a sample of the code:
var ss = SpreadsheetApp.getActiveSpreadsheet();
var options = {
'method' : 'get',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)
Chrome/51.0.2704.103 Safari/537.36',
'muteHttpExceptions': true,
Authorization: 'Bearer ?????',
Accept: 'application/json',
'Content-Type': 'application/json',
}
function scrapeJobs(){
var result = [];
for(var i =1; i <= 2; i++){
var url = 'https://www.xxxxxxx/page/' + i;
var resp = UrlFetchApp.fetch(url, options,).getContentText();
var $ = Cheerio.load(resp);
var jobList = $("#titlo > strong > a");
var urls = jobList.map(function() {return $(this).attr('href');}).toArray();
// debug code - outputs the urls it collected
console.log(urls);
for (let i = 0; i < urls.length; i++) {
var data = scrapeJobDetails(urls[i]);
if (data != null) {
result.push(...data);
}
}
}
I tried following the answer to a similar question asked here but no success. see the link below: