my dataframe df contains over 600 urls and i want to get specific value from an Element. this code works fine for that:
ownerlist = []
for links in tqdm (df['Link'], leave=False, position=0):
ownersite = s.get(links, cookies=cookies)
owsoup = BeautifulSoup(ownersite.content, 'lxml')
owner = owsoup.find('input', {'id': 'GlobalBodyContent_InternalBodyContent_BodyContent_Owner'}).get('value')
ownerlist.append(owner)
#print(len(ownerlist),owner)
df['Owner'] = ownerlist
print(df)
but it takes up to 40 minutes to get all requests done. i tried a multithread approach but i am not able to get it to work. it runs faster but insted off 600+ items i have only 2or 3 in my List after. I tried:
owner = []
def mt(links):
ap = s.get(links, cookies=cookies)
apsoup = BeautifulSoup(ap.content, 'lxml')
ap1 = apsoup.find('input', {'id': 'GlobalBodyContent_InternalBodyContent_BodyContent_Owner'}).get('value')
#print(ap1)
owner.append(ap1)
def main():
for links in tqdm(df['Link']):
threadProcess = threading.Thread(name='simplethread', target=mt, args=[links])
threadProcess.daemon = True
threadProcess.start()
main()
How can i run this loop faster than 40 minutes ? Thanks !