I want to grab Google search results with HtmlUnit. My code was working for a while, but now I am getting this exception:
com.gargoylesoftware.htmlunit.ScriptException: 503 Service Unavailable for https://ipv4.google.com/sorry/IndexRedirect?continue=......
It also prints me the supposedly not working JavaScript:
function () {
if (1 != c.readyState) {
var e = !1;
try {
e = 0 == c.status && 4 == c.readyState;
}
catch (k) {
e = !0;
}
var f, g = d;
e ? f = 21 : QS_kea(c.readyState, c.status) && 0 > (c.getResponseHeader("Content-Type") || "").indexOf("application/json") ? (f = 12, g = {response: c.responseText, url: d}) : QS_lea(c.status, 400, 500) ? f = 25 : QS_lea(c.status, 500, 600) && (f = 1);
if (void 0 !== f) {
QS_gea(a, f, null, g), QS_mea(a, b);
} else {
if (3 == c.readyState && a.ya && !a.ka) {
b.RJ = QS_hea(a, c.responseText, b.RJ, d);
} else {
if (4 == c.readyState && !b.complete) {
b.complete = !0;
var h = QS_d(function (b) {
a.ka || (b.RJ = QS_hea(a, b.ov.responseText, b.RJ, b.url, !0));
if (a.ka) {
var c = QS_d(a.Aa, a, h, 0);
a.ra.push(window.requestAnimationFrame(c));
} else {
QS_mea(a, b);
}
}, a, b);
200 == c.status ? h() : QS_mea(a, b);
}
}
}
}
}
If I suppress the script errors and status code errors, I can't get any results, I guess because the page doesn't load properly?
It also occurred to me, that maybe Google detects the scrape and blocks my code. That would be strange though, because then it detects the first try right away.