I use a python parser with selenium and beautiful soup to iterate through pages from one social network. The bottom line is that the program loads a lot of links to user pages and then analyzes each page in a cycle and outputs data (name, friends, subscriptions, etc.). At some point in the cycle, an error occurs, which does not load the next page. Moreover, it happens seemingly in a random place, i.e. I checked it on the same data.
cycle code:
people = BSFile.find_all(attrs={"class": "fans_fan_lnk"})
for i in range(len(people)):
print(people[i]["href"])
x = int(input())
for j in range(len(people)):
time.sleep(0.1)
driver.get("https://vk.com"+str(people[j]['href']))
time.sleep(0.1)
element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "AvatarRich__background")))
main_page = driver.page_source
time.sleep(0.1)
BSFile = bs(main_page, "html.parser")
MasForUnpack = []
if BSFile.find(attrs = {"class": "ClosedProfileBlock-module__title--eb2WU vkuiTitle vkuiTitle--l-3 vkuiTitle--w-2"}) != None:
print("Профиль закрыт")
Numbers = BSFile.find_all(attrs={"class": "vkuiHeader__indicator vkuiCaption vkuiCaption--l-1"})
TagOfName = BSFile.h2
for st in TagOfName.stripped_strings:
MasForUnpack.append(st)
if (len(MasForUnpack) == 2):
if (MasForUnpack[1].find('заходил') == -1) and (MasForUnpack[1].find('онлайн')== -1):
print(MasForUnpack[0] + " " + MasForUnpack[1])
else:
print(MasForUnpack[0])
if (len(MasForUnpack) == 3):
print(MasForUnpack[0] + " " + MasForUnpack[1])
#Names = BSFile.find_all(attrs={"class":"vkuiHeader__content-in"})
for i in range(len(Numbers)):
print("Друзья: "+ Numbers[i].string)
else:
print("Профиль открыт")
Numbers = []
Names = []
Numbers = BSFile.find_all(attrs={"class": "vkuiHeader__indicator vkuiCaption vkuiCaption--l-1 vkuiCaption--w-1"})
Names = BSFile.find_all(attrs={"class":"vkuiHeader__content-in"})
TagOfName = BSFile.h2
for st in TagOfName.stripped_strings:
MasForUnpack.append(st)
if (len(MasForUnpack) == 2):
if (MasForUnpack[1].find('заходил') == -1) and (MasForUnpack[1].find('онлайн')== -1):
print(MasForUnpack[0] + " " + MasForUnpack[1])
else:
print(MasForUnpack[0])
if (len(MasForUnpack) == 3):
print(MasForUnpack[0] + " " + MasForUnpack[1])
for i in range(len(Numbers)):
print(Names[i].string + ": "+ Numbers[i].string)
print()
Error message:
Traceback (most recent call last):
File "G:\_prog_files\Html\proba\2.py", line 51, in <module>
driver.get("https://vk.com"+str(people[j]['href']))
File "G:\Python\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 441, in get
self.execute(Command.GET, {'url': url})
File "G:\Python\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 429, in execute
self.error_handler.check_response(response)
File "G:\Python\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 243, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: session deleted because of page crash
from unknown error: cannot determine loading status
from tab crashed
I tried to change the chrome browser to edge, but I didn't do anything else, because I don't even understand what the problem is.
Also used this options for chrome
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options )
in the previous questions, I did not find an answer, because I do not work in linux and I do not understand all the points of the answers well.