1

This is the code:

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import presence_of_element_located
from selenium.webdriver.support import expected_conditions as EC
import time
import sys


login_url = 'https://www.researchgate.net/login'
base_url = "https://www.researchgate.net/institution/Islamia_College_Peshawar/department/Department_of_Computer_Science/members"
chrome_driver_path = 'home/danish-khan/scrapers/researchgate/chromedriver'

chrome_options = Options()
#chrome_options.add_argument('--headless')

webdriver = webdriver.Chrome(
  executable_path=chrome_driver_path, options=chrome_options
)

# default login credential and search query
username = 'your username'
password = 'your password'

with webdriver as driver:
    # Set timeout time 
    wait = WebDriverWait(driver, 2)

    # retrive url in headless browser
    driver.get(login_url)
    
    driver.find_element_by_id("input-login").send_keys(username)
    driver.find_element_by_id("input-password").send_keys(password)
    driver.find_element_by_class_name("nova-c-button__label").find_element(By.XPATH, "./..").click()
    time.sleep(2)

    driver.get(base_url)

    time.sleep(10)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(20)
    names = driver.find_elements_by_css_selector('.display-name')
    print('total names:',len(names))
  
time.sleep(10)

driver.close()

this is the output

total names: 20 Traceback (most recent call last): File "/home/danish-khan/scrapers/scrpers/lib/python3.8/site-packages/urllib3/connection.py", line 159, in _new_conn conn = connection.create_connection( File "/home/danish-khan/scrapers/scrpers/lib/python3.8/site-packages/urllib3/util/connection.py", line 84, in create_connection raise err File "/home/danish-khan/scrapers/scrpers/lib/python3.8/site-packages/urllib3/util/connection.py", line 74, in create_connection sock.connect(sa) ConnectionRefusedError: [Errno 111] Connection refused

I am trying to find out why it does not return all the elements because on the website there are more than 30 names/profiles but it only shows 20 names. I apply the wait until the element is found strategy but cannot work

any solution for it?

derloopkat
  • 6,232
  • 16
  • 38
  • 45
Danish Khan
  • 53
  • 10
  • Shouldn't you add the delay between calling scrollTo and find_elements? – Martheen Feb 19 '21 at 09:50
  • I added the sleep/delay but cann't work – Danish Khan Feb 19 '21 at 10:20
  • Sometimes the script will load only those parts that are visible to the user. You have to scroll through the page part by part so that when you reach the end, the whole webpage has loaded completely. [This](https://stackoverflow.com/a/65967148/14759065) might give you the answer. – Ananth Feb 19 '21 at 11:32
  • What happens if you delete the driver.close() function? –  Feb 19 '21 at 14:55

0 Answers0