am having issues with my code when trying to run multiprocessing tasks using multiprocessing python library.
Here is my code: I have a function called extract_tag_data
def extract_tag_data(tag):
search_bar.send_keys(tag)
search_bar.send_keys(Keys.RETURN)
for i in range (2):
articles=driver.find_elements(By.XPATH, "//table[@class='table table-hover']/tbody/tr/td[2]/div[@class='media']/div[@class='media-body']/strong/a")
for article in articles[:1]:
article.click()
dict['tag']=tag
dict['article_title'].append(unidecode.unidecode(driver.find_element(By.XPATH,'//h1[@class="title"]').text))
dict['abstract'].append(unidecode.unidecode(driver.find_element(By.XPATH,'//div[@class="abstract"]/div[1]').text))
dict['authors'].append(unidecode.unidecode(",".join([element.text for element in (driver.find_elements(By.XPATH,'//div[@class="authors"]/span'))])))
dict['structs'].append(unidecode.unidecode(",".join([element.text for element in (driver.find_elements(By.XPATH,'//div[@class="authors"]/div[@class="structs"]/div[@class="struct"]/a'))])))
driver.back()
driver.find_element(By.XPATH,'//table[@class="table table-hover"]/tfoot/tr[1]/th[2]/ul/li/a/span[@class="glyphicon glyphicon-step-forward"]').click()
and I want to run this task on tags list in parallel:
if __name__ == '__main__':
with multiprocessing.get_context('spawn').Pool(3) as pool:
pool.map(extract_tag_data, (tags))
pool.close()
driver.quit()
df = pd.DataFrame(dict,columns=['article_title', 'authors', 'abstract','structs','tag'])
df.to_excel(r"C:\\Users\\dell\\Desktop\\data collection\\myDataset.xlsx", sheet_name='Sheet1')
driver.quit()
but am getting the following error:
File "C:\Users\dell\miniconda3\lib\multiprocessing\spawn.py", line 134, in _check_not_importing_main
raise RuntimeError('''
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
[Done] exited with code=1 in 77.947 seconds