I have a list of links. Many of them are giving errors like 404: page note found and many are not getting connected ie timeout error. They might give all different kinds of error, I have not checked all. I want to separate good links which are working and bad links which give any kind of error. I tried to filter out the connection timed out links like this:
import requests
r=requests.get("http://www.carillionplc.co.uk/sustain/f_con2.htm", timeout=10)
But I got the following error:
---------------------------------------------------------------------------
timeout Traceback (most recent call last)
~/anaconda3/lib/python3.7/site-packages/urllib3/connection.py in _new_conn(self)
158 conn = connection.create_connection(
--> 159 (self._dns_host, self.port), self.timeout, **extra_kw)
160
~/anaconda3/lib/python3.7/site-packages/urllib3/util/connection.py in create_connection(address, timeout, source_address, socket_options)
79 if err is not None:
---> 80 raise err
81
~/anaconda3/lib/python3.7/site-packages/urllib3/util/connection.py in create_connection(address, timeout, source_address, socket_options)
69 sock.bind(source_address)
---> 70 sock.connect(sa)
71 return sock
timeout: timed out
During handling of the above exception, another exception occurred:
ConnectTimeoutError Traceback (most recent call last)
~/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601
~/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
353 else:
--> 354 conn.request(method, url, **httplib_request_kw)
355
~/anaconda3/lib/python3.7/http/client.py in request(self, method, url, body, headers, encode_chunked)
1228 """Send a complete request to the server."""
-> 1229 self._send_request(method, url, body, headers, encode_chunked)
1230
~/anaconda3/lib/python3.7/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
1274 body = _encode(body, 'body')
-> 1275 self.endheaders(body, encode_chunked=encode_chunked)
1276
~/anaconda3/lib/python3.7/http/client.py in endheaders(self, message_body, encode_chunked)
1223 raise CannotSendHeader()
-> 1224 self._send_output(message_body, encode_chunked=encode_chunked)
1225
~/anaconda3/lib/python3.7/http/client.py in _send_output(self, message_body, encode_chunked)
1015 del self._buffer[:]
-> 1016 self.send(msg)
1017
~/anaconda3/lib/python3.7/http/client.py in send(self, data)
955 if self.auto_open:
--> 956 self.connect()
957 else:
~/anaconda3/lib/python3.7/site-packages/urllib3/connection.py in connect(self)
180 def connect(self):
--> 181 conn = self._new_conn()
182 self._prepare_conn(conn)
~/anaconda3/lib/python3.7/site-packages/urllib3/connection.py in _new_conn(self)
163 self, "Connection to %s timed out. (connect timeout=%s)" %
--> 164 (self.host, self.timeout))
165
ConnectTimeoutError: (<urllib3.connection.HTTPConnection object at 0x7f7c86c14ba8>, 'Connection to www.carillionplc.co.uk timed out. (connect timeout=10)')
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
~/anaconda3/lib/python3.7/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
448 retries=self.max_retries,
--> 449 timeout=timeout
450 )
~/anaconda3/lib/python3.7/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
637 retries = retries.increment(method, url, error=e, _pool=self,
--> 638 _stacktrace=sys.exc_info()[2])
639 retries.sleep()
~/anaconda3/lib/python3.7/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
398 if new_retry.is_exhausted():
--> 399 raise MaxRetryError(_pool, url, error or ResponseError(cause))
400
MaxRetryError: HTTPConnectionPool(host='www.carillionplc.co.uk', port=80): Max retries exceeded with url: /sustain/f_con2.htm (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7f7c86c14ba8>, 'Connection to www.carillionplc.co.uk timed out. (connect timeout=10)'))
During handling of the above exception, another exception occurred:
ConnectTimeout Traceback (most recent call last)
<ipython-input-6-425ceeca52ad> in <module>
1 import requests
----> 2 r=requests.get("http://www.carillionplc.co.uk/sustain/f_con2.htm", timeout=10)
~/anaconda3/lib/python3.7/site-packages/requests/api.py in get(url, params, **kwargs)
73
74 kwargs.setdefault('allow_redirects', True)
---> 75 return request('get', url, params=params, **kwargs)
76
77
~/anaconda3/lib/python3.7/site-packages/requests/api.py in request(method, url, **kwargs)
58 # cases, and look like a memory leak in others.
59 with sessions.Session() as session:
---> 60 return session.request(method=method, url=url, **kwargs)
61
62
~/anaconda3/lib/python3.7/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
531 }
532 send_kwargs.update(settings)
--> 533 resp = self.send(prep, **send_kwargs)
534
535 return resp
~/anaconda3/lib/python3.7/site-packages/requests/sessions.py in send(self, request, **kwargs)
644
645 # Send the request
--> 646 r = adapter.send(request, **kwargs)
647
648 # Total elapsed time of the request (approximately)
~/anaconda3/lib/python3.7/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
502 # TODO: Remove this in 3.0.0: see #2811
503 if not isinstance(e.reason, NewConnectionError):
--> 504 raise ConnectTimeout(e, request=request)
505
506 if isinstance(e.reason, ResponseError):
ConnectTimeout: HTTPConnectionPool(host='www.carillionplc.co.uk', port=80): Max retries exceeded with url: /sustain/f_con2.htm (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7f7c86c14ba8>, 'Connection to www.carillionplc.co.uk timed out. (connect timeout=10)'))
Is there a simple way to just filter out the working links.