I am looking to extract the output from some gcloud commands which take some time to execute. When called independently with the subprocess.check_output(cmd, shell=True) method it works. However I have tried to speed up this process by using threads. I do not know / understand why the threads get stuck when calling this method.
from threading import Thread, Lock
from queue import Queue
import subprocess
import json
class WorkerThread(Thread):
def __init__(self, thread_id, queue):
super().__init__()
self.queue = queue
self.thread_id = thread_id
def run(self):
print(f'Started thread {self.thread_id}')
while True:
try:
project = self.queue.get(timeout=1)
command = f'gcloud iam service-accounts --project={project} list --format="flattened(email)" | awk \'{{ print $2 }}\' | grep -v ^$'
sa_list = subprocess.check_output(command, shell=True).decode('utf-8').splitlines()
print ('THIS DOES NOT GET PRINTED')
if sa_list:
with lock:
data[project] = []
for sa in sa_list:
data[project].append({'email': sa})
self.queue.task_done()
except:
return
data = {}
q = Queue()
threads = []
lock = Lock()
command = 'gcloud projects list --format="flattened(projectId)" | awk \'{ print $2 }\' | grep -v ^$'
projects = subprocess.check_output(command, shell=True).decode('utf-8').splitlines()
for project in projects:
q.put(project)
for i in range(10):
t = WorkerThread(i, q)
t.daemon = True
t.start()
threads.append(t)
q.join()
for t in threads:
t.join()
with open('results_threading', 'w') as f:
json.dump(data, f, indent=2)
I have tried to execute other code inside the thread (instead of subprocess.check_output) and the program seems to run concurrently.
Also found this old post related to the same issue. Unfortunatly, the author seemed to have found a solution but did not provide any details.
Appreciate any suggestions.Thank You !