I need to extract phone# and website links along with the name and country of the Universities from a website. The website is https://www.whed.net/results_institutions.php?Chp2=Business%20Administration and the problem is there is a +
sign which needs to be clicked for every university then data needs to be extracted, it needs to close and move on to the next one.
I have tried multiple ways through selenium as follows:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
import time
from bs4 import BeautifulSoup
import pandas as pd
#opening the web browser
browser = webdriver.Chrome('C:\\Users\\albert.malhotra\\Desktop\\Web Scrapings\\Kentucky State\\chromedriver')
#assigning the link to a variable
url = 'https://www.whed.net/results_institutions.php?Chp2=Business%20Administration'
#opening the url in browser while waiting 10 seconds for it to load
browser.get(url)
dfs = []
dfss = []
for n in range(50):
html = browser.page_source
soup = BeautifulSoup(html, 'lxml')
for data in soup.find_all('p' , {'class' : 'country'}):
item = data.text
for thead in soup.find_all('div', {'class' : 'details'}):
#data_2 = thead.find_all('a')
data_2 = thead.select('h3')
browser.find_element_by_link_text('More details').click()
html_2 = browser.page_source
soup_1 = BeautifulSoup(html_2, 'lxml')
name = []
for phone in soup_1.find_all('span' , {'class' : 'contenu'}):
data_3 = phone.text
name.append(data_3)
browser.find_element_by_class_name("fancybox-item fancybox-close").click()
dfss.append(data_2[0].text)
dfs.append(item)