I'm trying to build a program that uses proxies to scrape a certain website. I've set up my random proxy generator correctly, but When running the following code:
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy, ProxyType
import random
from string import ascii_lowercase
PATH = r"C:\Users\...\PythonProject\chromedriver.exe"
def proxy_update():
global driver
proxies = []
driver.get('https://free-proxy-list.net/')
proxy_search = driver.find_element_by_xpath("""//*[@id="proxylisttable"]/tbody""").text.split('ago\n')
for pr in proxy_search:
proxies.append(pr.split(' '))
for lst in proxies:
for pr in lst:
if pr == "elite":
proxies.append(lst[0] + ':' + lst[1])
else:
pass
if len(proxies) > 10:
driver.quit()
return random.sample(proxies, k=1)
else:
proxy_update()
driver = webdriver.Chrome(PATH)
PROXY = proxy_update()
webdriver.DesiredCapabilities.CHROME['proxy'] = {
"httpProxy": PROXY,
"ftpProxy": PROXY,
"sslProxy": PROXY,
"proxyType": "MANUAL",
}
webdriver.DesiredCapabilities.CHROME['acceptSslCerts'] = True
proxy_update()
I get an error when trying to connect to wikipedia:
driver.get("https://www.wikipedia.org/wiki/Rotterdam")
Results in:
Traceback (most recent call last):
File "C:\Users\...\venv\lib\site-packages\urllib3\connection.py", line 170, in _new_conn
(self._dns_host, self.port), self.timeout, **extra_kw
File "C:\Users\...\venv\lib\site-packages\urllib3\util\connection.py", line 96, in create_connection
raise err
File "C:\Users\...\venv\lib\site-packages\urllib3\util\connection.py", line 86, in create_connection
sock.connect(sa)
ConnectionRefusedError: [WinError 10061] No connection could be made because the target machine actively refused it
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\...\venv\lib\site-packages\urllib3\connectionpool.py", line 706, in urlopen
chunked=chunked,
File "C:\Users\...\venv\lib\site-packages\urllib3\connectionpool.py", line 394, in _make_request
conn.request(method, url, **httplib_request_kw)
File "C:\Users\...\venv\lib\site-packages\urllib3\connection.py", line 234, in request
super(HTTPConnection, self).request(method, url, body=body, headers=headers)
File "C:\Users\...\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 1262, in request
self._send_request(method, url, body, headers, encode_chunked)
File "C:\Users\...\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 1308, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "C:\Users\...\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 1257, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "C:\Users\...\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 1028, in _send_output
self.send(msg)
File "C:\Users\...\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 968, in send
self.connect()
File "C:\Users\...\venv\lib\site-packages\urllib3\connection.py", line 200, in connect
conn = self._new_conn()
File "C:\Users\...\venv\lib\site-packages\urllib3\connection.py", line 182, in _new_conn
self, "Failed to establish a new connection: %s" % e
urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPConnection object at 0x000001F755EC47C8>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/.../PythonProject.py", line 46, in <module>
proxy_update()
File "C:/Users/.../PythonProject.py", line 15, in proxy_update
driver.get('https://free-proxy-list.net/')
File "C:\Users\...\venv\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 333, in get
self.execute(Command.GET, {'url': url})
File "C:\Users\...\venv\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 319, in execute
response = self.command_executor.execute(driver_command, params)
File "C:\Users\...\venv\lib\site-packages\selenium\webdriver\remote\remote_connection.py", line 374, in execute
return self._request(command_info[0], url, body=data)
File "C:\Users\...\venv\lib\site-packages\selenium\webdriver\remote\remote_connection.py", line 397, in _request
resp = self._conn.request(method, url, body=body, headers=headers)
File "C:\Users\...\venv\lib\site-packages\urllib3\request.py", line 79, in request
method, url, fields=fields, headers=headers, **urlopen_kw
File "C:\Users\...\venv\lib\site-packages\urllib3\request.py", line 170, in request_encode_body
return self.urlopen(method, url, **extra_kw)
File "C:\Users\...\venv\lib\site-packages\urllib3\poolmanager.py", line 375, in urlopen
response = conn.urlopen(method, u.request_uri, **kw)
File "C:\Users\...\venv\lib\site-packages\urllib3\connectionpool.py", line 796, in urlopen
**response_kw
File "C:\Users\...\venv\lib\site-packages\urllib3\connectionpool.py", line 796, in urlopen
**response_kw
File "C:\Users\...\venv\lib\site-packages\urllib3\connectionpool.py", line 796, in urlopen
**response_kw
File "C:\Users\...\venv\lib\site-packages\urllib3\connectionpool.py", line 756, in urlopen
method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
File "C:\Users\...\venv\lib\site-packages\urllib3\util\retry.py", line 573, in increment
raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='127.0.0.1', port=62467):
Max retries exceeded with url: /session/8d08a1391f64979088c1e0a83cb674b6/url
(Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000011EF27F48C8>:
Failed to establish a new connection: [WinError 10061]
No connection could be made because the target machine actively refused it'))
Does this mean wikipedia is refusing requests from public, free proxy servers? As my proxy is just scraped from a free proxy website. I've heard that these proxies are very well known and actively blocked by sites such as google.