I am trying to write a program scrape website content. The script seems to run for a while but stops after a couple of iterations
Traceback (most recent call last):
File "D:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\multiprocessing\util.py", line 300, in _run_finalizers
finalizer()
File "D:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\multiprocessing\util.py", line 224, in __call__
res = self._callback(*self._args, **self._kwargs)
File "D:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\multiprocessing\pool.py", line 581, in _terminate_pool
cls._help_stuff_finish(inqueue, task_handler, len(pool))
File "D:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\multiprocessing\pool.py", line 568, in _help_stuff_finish
inqueue._reader.recv()
File "D:\Program Files (x86)\Microsoft Visual Studio\Shared\Python37_64\lib\multiprocessing\connection.py", line 251, in recv
return _ForkingPickler.loads(buf.getbuffer())
AttributeError: Can't get attribute 'InsertNews' on <module '__main__' from 'c:\\program files (x86)\\microsoft visual studio\\2019\\common7\\ide\\extensions\\microsoft\\python\\core\\debugpy\\__main__.py'>
Here is the script that I am trying to run
from boilerpy3 import extractors
import pymongo
import multiprocessing as mp
def InsertNews(newsite, symbol):
print(symbol)
print(newsite)
extractor = extractors.ArticleExtractor()
try:
content = extractor.get_content_from_url(newsite)
except Exception:
pass
print(content)
record={symbol,content}
mydb["StocksPressRelease"].insert_one(record)
if __name__ == "__main__":
print("started")
pool = mp.Pool(mp.cpu_count())
myclient = pymongo.MongoClient("mongodb+srv://un:pwd@cluster0.subkd.azure.mongodb.net/db?retryWrites=true&w=majority&connectTimeoutMS=900000")
mydb = myclient["db"]
mycol = mydb["Stocks"]
for x in mycol.find({},{"_id": 0, "symbol":1, "newsite": 1 }):
results = pool.apply_async(InsertNews,args=(x["newsite"],x["symbol"]))
pool.close()