Comrades, good afternoon. This problem has not been solved for me for some time. I tried many options, with my friends we tested the problem and wrote about it to the developers of the library. But no solution has been found. This is a request library in Python. In the case when a request is sent using a proxy server (anonymous) to sites where you can check your external IP, my IP is returned. I really consciously approached the question and combined some of the knowledge that I have acquired here on StackOverFlow
My code specifically contains a function to check my external IP:
def check_my_ip(
header ={},
use_proxy: bool = False,
proxy_dict={}):
my_ip: str =""
message = []
flag = False
try:
my_ip = requests.get(url = 'https://ifconfig.me/', headers=header, proxies=proxy_dict, verify=False)
my_ip = my_ip.text
if len(my_ip) > 15: my_ip=""
except:
my_ip = ""
if my_ip =="":
try:
my_ip = requests.get('https://ramziv.com/ip', headers=header,proxies=proxy_dict, verify=False).text
if len(my_ip) > 15: my_ip=""
except:
my_ip = ""
if my_ip == "":
try:
s = requests.get('https://2ip.ua/ru/', headers=header, proxies=proxy_dict, verify=False)
b = BeautifulSoup(s.text, "html.parser")
b = b.select(" .ipblockgradient .ip")[0].getText()
my_ip = re.search(r"(\d{1,3}\.){1,3}\d{1,3}", b)
if len(my_ip) > 15: my_ip=""
except:
my_ip=""
if my_ip!="":
print("Received IP: " + my_ip)
flag = True
else:
print("Failed to get IP")
return {'flag': flag, 'result': my_ip, 'message': message}
This function can accept a proxy if it is sent to it.
Also I have a function to get the fake agent:
def take_header():
headers: dict = {}
message = []
flag = False
try:
user = fake_useragent.UserAgent().random
headers = {'User-Agent': user}
print("Fake agent received!\n" + str(headers))
except:
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
print("Header getting error!\n" + str(headers))
return {'flag': flag, 'result': headers, 'message': message}
I'm getting a proxy from the famous site https://www.us-proxy.org/ using a table processing function I wrote. This function allows you to filter the proposed proxies and I take only those that are anonymous.
def take_proxy(url: str = "",
headers:dict = {},
proxies: dict = {},
take_http: bool = False,
take_https: bool = False):
proxy_dict: dict = {}
message = []
flag = False
try:
try:
res = requests.get(url=url, headers=headers, proxies = proxies)
print("Get table from page " + str(url))
except Exception as exc:
print("Error getting table from page" + str(url))
print("Error text:" + str(exc))
res = 0
soup = BeautifulSoup(res.text, "lxml")
table_proxy_list = soup.find('table', class_="table table-striped table-bordered")
proxy_list = []
for row in table_proxy_list.tbody.find_all('tr'):
columns = row.find_all('td')
temp_proxy_items = proxy_items_us_proxy_org()
if (columns != []):
temp_proxy_items.IP_Address = columns[0].text.strip()
temp_proxy_items.Port = int(columns[1].text.strip())
temp_proxy_items.Code = columns[2].text.strip()
temp_proxy_items.Country = columns[3].text.strip()
temp_proxy_items.Anonymity = True if \
(columns[4].text.strip() == "anonymous" or \
columns[4].text.strip() == "elite proxy") else False
temp_proxy_items.Google = True if columns[5].text.strip() == "yes" else False
temp_proxy_items.Https = True if columns[6].text.strip() == "yes" else False
temp_proxy_items.Last_Checked = columns[7].text.strip()
proxy_list.append(temp_proxy_items)
columns = None
table_head = [str(table_head_item.text).replace(" ", "_") for table_head_item in
table_proxy_list.thead.find_all('th')]
df_proxy_list = pd.DataFrame.from_records([t.__dict__ for t in proxy_list], columns=table_head)
df_proxy_list['HTTP_S'] = np.where(df_proxy_list['Https'] == True, "https", "http")
df_proxy_list['IP_PORT'] = df_proxy_list.agg('{0[HTTP_S]}://{0[IP_Address]}:{0[Port]}'.format, axis=1)
df_proxy_list_http = df_proxy_list.query('Https==False & Anonymity==True')
df_proxy_list_https = df_proxy_list.query('Https==True & Anonymity==True')
df_proxy_list_http = df_proxy_list_http[['IP_PORT']]
df_proxy_list_https = df_proxy_list_https[['IP_PORT']]
df_proxy_list_http = df_proxy_list_http['IP_PORT'].to_list()
df_proxy_list_https = df_proxy_list_https['IP_PORT'].to_list()
proxy_dict={}
if take_http == True:
proxy_dict["http"] = df_proxy_list_http
if take_https == True:
proxy_dict["https"] = df_proxy_list_https
print("Proxy list received: \n" + str(proxy_dict))
flag = True
except Exception as exc:
print("The proxy list is empty, because error in proxy table processing")
print("Error text:" + str(exc))
return {'flag': flag, 'result': proxy_dict, 'message': message}
I combine all these functions in this order:
- I define my IP;
- I run a function to process a site with a proxy and get a list of them filtered by the condition of anonymity.
- I check all proxies, send them back to the function to make sure that these sites will show proxy addresses as a response.
if __name__ == '__main__':
my_ip = check_my_ip()
print("My IP: " + my_ip['result'])
header = take_header()['result']
proxies= take_proxy(url="https://www.us-proxy.org/", headers=header, take_http=True, take_https=True)
for item in proxies['result']:
proxy_items = proxies['result'][item]
proxy_dict_to_send = {}
for proxy_items_i in proxy_items:
proxy_dict_to_send["http"] = proxy_items_i
print("Used by proxy: "+ str(proxy_dict_to_send))
result_check = check_my_ip(header = header, use_proxy = True, proxy_dict = proxy_dict_to_send)
print (result_check)
proxy_dict_to_send = {}
I prepared this long code because when I asked a question on individual points, I received answers that are already on the network. Please spend time on my code, this is indeed a problem that shows up in the library and needs some kind of solution.
I posted here the full version of my code
And here I am attaching the file with the code for your convenience.
Please pay attention to the problem, help to understand. Maybe I'm wrong (and three of my comrades), or maybe a real problem has been identified.
Maybe you can suggest some temporary other solutions? It may be possible to implement similar code using other libraries or in general in other ways.
I need to receive information anonymously, substituting another, proxy, as the address. Maybe there is some other library for sending requests to sites. Any options please.
FULL MINIMUM Version of code:
import fake_useragent
from bs4 import BeautifulSoup, element
import requests
import pandas as pd
import numpy as np
import re
import http.client
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class proxy_items_us_proxy_org:
def __init__(self, IP_Address: str = "",
Port: int = 0,
Code: str = "",
Country: str = "",
Anonymity: bool = False,
Google: bool = False,
Https: bool = False,
Last_Checked: str = ""):
self.IP_Address = IP_Address,
self.Port = Port,
self.Code = Code,
self.Country = Country,
self.Anonymity = Anonymity,
self.Google = Google,
self.Https = Https,
self.Last_Checked = Last_Checked
def __str__(self):
return self.IP_Address + ":" \
+ str(self.Port) +"; " \
+ self.Code + "; " \
+ self.Country +"; " \
+ str(self.Anonymity) + "; " \
+ str(self.Google) + "; " \
+ str(self.Https) + "; " \
+ self.Last_Checked
def __repr__(self):
return self.__str__()
def to_list(self):
return [
self.IP_Address,
self.Port,
self.Code,
self.Country,
self.Anonymity,
self.Google,
self.Https,
self.Last_Checked
]
def check_my_ip(
header =None,
use_proxy: bool = False,
proxy_dict=None):
my_ip: str =""
message = []
flag = False
try:
my_ip = requests.get(url = 'https://ifconfig.me/', headers=header, proxies=proxy_dict, verify=False)
my_ip = my_ip.text
if len(my_ip) > 15: my_ip=""
except:
my_ip = ""
if my_ip =="":
try:
my_ip = requests.get('https://ramziv.com/ip', headers=header,proxies=proxy_dict, verify=False).text
if len(my_ip) > 15: my_ip=""
except:
my_ip = ""
if my_ip == "":
try:
s = requests.get('https://2ip.ua/ru/', headers=header, proxies=proxy_dict, verify=False)
b = BeautifulSoup(s.text, "html.parser")
b = b.select(" .ipblockgradient .ip")[0].getText()
my_ip = re.search(r"(\d{1,3}\.){1,3}\d{1,3}", b)
if len(my_ip) > 15: my_ip=""
except:
my_ip=""
if my_ip!="":
print("Received IP: " + my_ip)
flag = True
else:
print("Failed to get IP")
return {'flag': flag, 'result': my_ip, 'message': message}
def take_proxy(url: str = "",
headers:dict = None,
proxies: dict = None,
take_http: bool = False,
take_https: bool = False):
proxy_dict: dict = {}
message = []
flag = False
try:
try:
res = requests.get(url=url, headers=headers, proxies = proxies)
print("Get table from page " + str(url))
except Exception as exc:
print("Error getting table from page" + str(url))
print("Error text:" + str(exc))
res = 0
soup = BeautifulSoup(res.text, "lxml")
table_proxy_list = soup.find('table', class_="table table-striped table-bordered")
proxy_list = []
for row in table_proxy_list.tbody.find_all('tr'):
columns = row.find_all('td')
temp_proxy_items = proxy_items_us_proxy_org()
if (columns != []):
temp_proxy_items.IP_Address = columns[0].text.strip()
temp_proxy_items.Port = int(columns[1].text.strip())
temp_proxy_items.Code = columns[2].text.strip()
temp_proxy_items.Country = columns[3].text.strip()
temp_proxy_items.Anonymity = True if \
(columns[4].text.strip() == "anonymous" or \
columns[4].text.strip() == "elite proxy") else False
temp_proxy_items.Google = True if columns[5].text.strip() == "yes" else False
temp_proxy_items.Https = True if columns[6].text.strip() == "yes" else False
temp_proxy_items.Last_Checked = columns[7].text.strip()
proxy_list.append(temp_proxy_items)
columns = None
table_head = [str(table_head_item.text).replace(" ", "_") for table_head_item in
table_proxy_list.thead.find_all('th')]
df_proxy_list = pd.DataFrame.from_records([t.__dict__ for t in proxy_list], columns=table_head)
df_proxy_list['HTTP_S'] = np.where(df_proxy_list['Https'] == True, "https", "http")
df_proxy_list['IP_PORT'] = df_proxy_list.agg('{0[IP_Address]}:{0[Port]}'.format, axis=1)
df_proxy_list_http = df_proxy_list.query('Https==False & Anonymity==True')
df_proxy_list_https = df_proxy_list.query('Https==True & Anonymity==True')
df_proxy_list_http = df_proxy_list_http[['IP_PORT']]
df_proxy_list_https = df_proxy_list_https[['IP_PORT']]
df_proxy_list_http = df_proxy_list_http['IP_PORT'].to_list()
df_proxy_list_https = df_proxy_list_https['IP_PORT'].to_list()
proxy_dict={}
if take_http == True:
proxy_dict["http"] = df_proxy_list_http
if take_https == True:
proxy_dict["https"] = df_proxy_list_https
print("Proxy list received: \n" + str(proxy_dict))
flag = True
except Exception as exc:
print("The proxy list is empty, because error in proxy table processing")
print("Error text:" + str(exc))
return {'flag': flag, 'result': proxy_dict, 'message': message}
if __name__ == '__main__':
my_ip = check_my_ip()
print("My IP: " + my_ip['result'])
try:
user = fake_useragent.UserAgent().random
headers = {'User-Agent': user}
except:
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
proxies= take_proxy(url="https://www.us-proxy.org/", headers=headers, take_http=True, take_https=True)
for item in proxies['result']:
proxy_items = proxies['result'][item]
proxy_dict_to_send = {}
for proxy_items_i in proxy_items:
proxy_dict_to_send["http"] = proxy_items_i
proxy_dict_to_send["https"] = proxy_items_i
print("Used by proxy: "+ str(proxy_dict_to_send))
result_check = check_my_ip(header = headers, use_proxy = True, proxy_dict = proxy_dict_to_send)
print (result_check)
proxy_dict_to_send = {}