Like do Cong Ma said in his answer doing many automated searches on google will result in google block you and you'll get error 503. Only API from google for doing seaches that is currently working is Google Custom Search API. The problem with that is that it was designed to search through your pages. And there is option to set it to search all pages (see this answer), but even then you can have only 100 seacrhes per day. Before there was option to use other APIs but like Bing and Yahoo, but neither of them are free anymore. Only free API that does internet searches is FAROO API. But there is still one option to do google search by using selenium webdriver. Selenium is used to imitate browser usage and it has options to use Firefox, Chrome, Edge or Safari webdrivers (it actually opens Chrome and does your search), but this is annoying because you don't actually want to see the browser. But there is solution for this you can use PhantomJS. Download from here. Extracted and see how to use it in the example below (I wrote a simple class which you can use, you just need to change the path to PhantomJS):
import time
from urllib.parse import quote_plus
from selenium import webdriver
class Browser:
def __init__(self, path, initiate=True, implicit_wait_time = 10, explicit_wait_time = 2):
self.path = path
self.implicit_wait_time = implicit_wait_time # http://www.aptuz.com/blog/selenium-implicit-vs-explicit-waits/
self.explicit_wait_time = explicit_wait_time # http://www.aptuz.com/blog/selenium-implicit-vs-explicit-waits/
if initiate:
self.start()
return
def start(self):
self.driver = webdriver.PhantomJS(path)
self.driver.implicitly_wait(self.implicit_wait_time)
return
def end(self):
self.driver.quit()
return
def go_to_url(self, url, wait_time = None):
if wait_time is None:
wait_time = self.explicit_wait_time
self.driver.get(url)
print('[*] Fetching results from: {}'.format(url))
time.sleep(wait_time)
return
def get_search_url(self, query, page_num=0, per_page=10, lang='en'):
query = quote_plus(query)
url = 'https://www.google.hr/search?q={}&num={}&start={}&nl={}'.format(query, per_page, page_num*per_page, lang)
return url
def scrape(self):
#xpath migth change in future
links = self.driver.find_elements_by_xpath("//h3[@class='r']/a[@href]") # searches for all links insede h3 tags with class "r"
results = []
for link in links:
d = {'url': link.get_attribute('href'),
'title': link.text}
results.append(d)
return results
def search(self, query, page_num=0, per_page=10, lang='en', wait_time = None):
if wait_time is None:
wait_time = self.explicit_wait_time
url = self.get_search_url(query, page_num, per_page, lang)
self.go_to_url(url, wait_time)
results = self.scrape()
return results
path = '<YOUR PATH TO PHANTOMJS>/phantomjs-2.1.1-windows/bin/phantomjs.exe' ## SET YOU PATH TO phantomjs
br = Browser(path)
results = br.search('site:facebook.com inurl:login')
for r in results:
print(r)
br.end()