from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import requests
from csv import writer
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20)
url='https://mergr.com/login'
driver.get(url)
email=driver.find_element(By.CSS_SELECTOR,"input#username")
email.send_keys("timgr8@outlook.com")
password=driver.find_element(By.CSS_SELECTOR,"input#password")
password.send_keys("Cosmos1990$$$$$$$")
login=driver.find_element(By.CSS_SELECTOR,"button.btn").click()
product=[]
for page in range(1,3):
URL = 'https://mergr.com/firms/search/employees?page={page}&firm%5BactiveInvestor%5D=2&sortColumn=employee_weight&sortDirection=asc'.format(page=page)
driver.get(URL)
added_urls = []
soup = BeautifulSoup(driver.page_source,"lxml")
details = soup.select("tbody tr")
for detail in details:
try:
t1 = detail.select_one("h5.profile-title a").text
except:
# pass # then you'll just be using the previous row's t1
# [also, if this happens in the first loop, it will raise an error]
t1 = 'MISSING' # '' #
try:
t2=detail.select_one("h5.profile-title small").text
except:
pass
try:
t3=detail.select_one("h5.profile-title + p a").text
except:
pass
try:
t4=detail.select_one("a[title='Email']").text
except:
t4=' '
try:
t5=detail.select_one("p[class='font-alt']").get_text(' ',strip=True)
except:
pass
wev = {
'Name':t1,
'Title/position':t2,
'Company name':t3,
'Email address':t4,
'Bio':t5
}
product.append(wev)
df=pd.DataFrame(product)
df.to_csv('firm.csv')
The data are not coming sequence in csv file
so how to resolve that issue I want that data come in sequence like that Name
,Title/position
,Company name
and so but data do not come in sequence they come in no sequence as you above kindly why the data are not come in sequence in dataframe what wrong was that.....kindly recommend what I am doing wrong there