0

I need to validate services and their dependencies using around 500+ URL's and I already have a python code that does it. But the problem is that some of the URL's take a minute each to give a response [due to some known dependencies being down]. As each URL is hosted on a different server, is there a way to access multiple URL's at once using the requests module?

Below is my entire code I use in pycharm:

import requests
import json
import pandas
import datetime
from requests.auth import HTTPBasicAuth
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)


def validate():

    line = "---------------------------------------------------------------------------------------------------"
    print("Validation started for:", datetime.datetime.now().strftime("%d-%B-%Y at %H:%M:%S"), "\n" + line)
    username = 'username'
    password = 'password'
    mydata = pandas.read_excel(r'C:\mydata.xlsx', sheet_name='Sheet1')

    for i in mydata.index:
        srno = str(mydata['Sr No'][i])
        service = mydata['Service Name'][i]
        machine = mydata['Machine Name'][i]
        url = mydata['Node'][i]

        alwaysdownservice = ['service1', 'service2']
        paydown = ['dependency1', 'dependency2', 'dependency3']
        otherdown = ['dependency3']

        def get():
            response = requests.get(url, verify=False, auth=HTTPBasicAuth(username, password))
            data = json.loads(response.text)
            status = data['Success']
            if not status:
                response = requests.get(url, verify=False, auth=HTTPBasicAuth(username, password))
                data = json.loads(response.text)
                status = data['Success']
                if not status:
                    for j in list(data['Dependencies']):
                        dependency = j['DependencyName']
                        d_status = j['Success']
                        if not d_status:
                            if service in alwaysdownservice:
                                if dependency not in paydown:
                                    print(Dependency, "down on", machine, "for", service.)
                            else:
                                if dependency not in otherdown:
                                    print(Dependency, "down on", machine, "for", service.)
                                    
        try:
            get()
            
        except Exception as e:
                        print(line, "\n", e, "\n", srno, "| Below URL is not accessible: \n", url, "\n" + line)



validate()

2 Answers2

0

You can use threads (using the threading library in Python) to call multiple URL's at once. To do that you can use the following code:

import requests
import json
import pandas
import datetime
from requests.auth import HTTPBasicAuth
from requests.packages.urllib3.exceptions import InsecureRequestWarning
import threading
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

threads = []

def validate():

    line = "---------------------------------------------------------------------------------------------------"
    print("Validation started for:", datetime.datetime.now().strftime("%d-%B-%Y at %H:%M:%S"), "\n" + line)
    username = 'username'
    password = 'password'
    mydata = pandas.read_excel(r'C:\mydata.xlsx', sheet_name='Sheet1')

    for i in mydata.index:
        srno = str(mydata['Sr No'][i])
        service = mydata['Service Name'][i]
        machine = mydata['Machine Name'][i]
        url = mydata['Node'][i]

        alwaysdownservice = ['service1', 'service2']
        paydown = ['dependency1', 'dependency2', 'dependency3']
        otherdown = ['dependency3']

        def get():
            response = requests.get(url, verify=False, auth=HTTPBasicAuth(username, password))
            data = json.loads(response.text)
            status = data['Success']
            if not status:
                response = requests.get(url, verify=False, auth=HTTPBasicAuth(username, password))
                data = json.loads(response.text)
                status = data['Success']
                if not status:
                    for j in list(data['Dependencies']):
                        dependency = j['DependencyName']
                        d_status = j['Success']
                        if not d_status:
                            if service in alwaysdownservice:
                                if dependency not in paydown:
                                    print(Dependency, "down on", machine, "for", service)
                            else:
                                if dependency not in otherdown:
                                    print(Dependency, "down on", machine, "for", service)
                                    
        try:
            t = threading.Thread(target=get) # Using threading over here
            t.start()
            threads.append(t)
            
            
        except Exception as e:
                        print(line, "\n", e, "\n", srno, "| Below URL is not accessible: \n", url, "\n" + line)




validate()

for thread in threads:
    thread.join()
abhigyanj
  • 2,355
  • 2
  • 9
  • 29
  • do i need to specify thread count somewhere or something else? I ran the above code and its still accessing 1 url at a time. – messi.torres28 Nov 13 '20 at 09:26
  • Yes I am sure.. I just ran it and its still accessing 1 url at time. Just to be clear, the only changes you have made are importing threading and changing the 'try' section of the code. – messi.torres28 Nov 13 '20 at 09:57
  • @messi.torres28 I realized my error in the code. I have fixed it now – abhigyanj Nov 13 '20 at 10:05
  • Still it wont work. I tried only accessing the URL's that take more than 30 seconds each and the time taken to access each URL is still the same. Please see below output - ``` Validation started for: 13-November-2020 at 15:42:41 [21] Service1 *Machine* validated at: 15:43:25 | [22] Service1 *Machine* validated at: 15:44:10 ``` – messi.torres28 Nov 13 '20 at 10:14
  • Might be because a URL you are requesting is responding slow – abhigyanj Nov 13 '20 at 10:16
  • I agree but normally without the threading part, it still takes the same time to access the URL. – messi.torres28 Nov 13 '20 at 10:17
  • I think one URL is taking 30 secs and the others are done within a second so maybe there is not much of a time difference – abhigyanj Nov 13 '20 at 10:18
  • Unfortunately, your solution did not work... but I found [this](https://stackoverflow.com/questions/51726007/fetching-multiple-urls-with-aiohttp-in-python) from @Yurii Kramarenko. Which worked perfectly and now my script finishes its run in 30 seconds instead of 10-11 minutes. Now I am just learning/understanding the solution as I did not understand _why_ it works. – messi.torres28 Nov 20 '20 at 10:01
0

For people who need the solution. I found this from @Yurii Kramarenko. Which worked perfectly and now my script finishes its run in 30 seconds instead of 10-11 minutes.

My Script -

def validate():
    alwaysdownservice = ['service1', 'service2']
    paydown = ['dependency1', 'dependency2', 'dependency3']
    otherdown = ['dependency3']
    username = 'username'
    password = 'password'
    mydata = pandas.read_excel(r'C:\mydata.xlsx', sheet_name='Sheet1')
    urls = mydata['urls']
    line = "---------------------------------------------------------------------------------------------------"
    print("Validation started for:", datetime.datetime.now().strftime("%d-%B-%Y at %H:%M:%S"), "\n" + line)

    async def fetch(session, url):
        async with session.get(url, auth=aiohttp.BasicAuth(username, password), ssl=False) as response:

            data = await response.text()
            data = json.loads(data)
            status = data['Success']

            if not status:
                for j in list(data['Dependencies']):
                    dependency = j['DependencyName']
                    d_status = j['Success']
                    if not d_status:
                        if service in alwaysdownservice:
                            if dependency not in paydown:
                                print("Dependency -",
                                      "\'" + dependency + "\'", "down on", "\nURL -", url, "\n" + line)
                        else:
                            if dependency not in otherdown:
                                ("Dependency -",
                                      "\'" + dependency + "\'", "down on", "\nURL -", url, "\n" + line)

            print(url, "validated at:", datetime.datetime.now().strftime("%H:%M:%S"))

    async def fetch_all(urls, loop):
        async with aiohttp.ClientSession(loop=loop) as session:
            results = await asyncio.gather(*[fetch(session, url) for url in urls], return_exceptions=True)

    if __name__ == '__main__':
        loop = asyncio.get_event_loop()
        htmls = loop.run_until_complete(fetch_all(urls, loop))

    print("Validation completed for:",
          datetime.datetime.now().strftime("%d-%B-%Y at %H:%M:%S"), "\n" + line, "\n" + line,)


validate()