0
from selenium import webdriver           
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import requests
from csv import writer


options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 20) 

url='https://mergr.com/login'

driver.get(url)

email=driver.find_element(By.CSS_SELECTOR,"input#username")
email.send_keys("timgr8@outlook.com")

password=driver.find_element(By.CSS_SELECTOR,"input#password")
password.send_keys("Cosmos1990$$$$$$$")

login=driver.find_element(By.CSS_SELECTOR,"button.btn").click()
product=[]
for page in range(1,3):
        URL = 'https://mergr.com/firms/search/employees?page={page}&firm%5BactiveInvestor%5D=2&sortColumn=employee_weight&sortDirection=asc'.format(page=page)
        driver.get(URL)
        added_urls = []        
        
        soup = BeautifulSoup(driver.page_source,"lxml")
        details = soup.select("tbody tr")
        for detail in details:

                try:        
                        t1 = detail.select_one("h5.profile-title a").text
                except:
                        # pass # then you'll just be using the previous row's t1
                        # [also, if this happens in the first loop, it will raise an error]

                        t1 = 'MISSING' # '' #
                
                try:
                        t2=detail.select_one("h5.profile-title small").text
                except:
                        pass

                try:
                        t3=detail.select_one("h5.profile-title + p a").text
                except:
                        pass

                try:
                        t4=detail.select_one("a[title='Email']").text
                        
                except:
                        t4=' '

                try:
                        t5=detail.select_one("p[class='font-alt']").get_text(' ',strip=True)
                except:
                        pass
        
                wev = {
                        'Name':t1,
                        'Title/position':t2,
                        'Company name':t3,
                        'Email address':t4,
                        'Bio':t5
                        }
                product.append(wev)
                
df=pd.DataFrame(product)
df.to_csv('firm.csv')

The data are not coming sequence in csv file so how to resolve that issue I want that data come in sequence like that Name,Title/position,Company name and so but data do not come in sequence they come in no sequence as you above kindly why the data are not come in sequence in dataframe what wrong was that.....kindly recommend what I am doing wrong there

enter image description here

Amen Aziz
  • 153
  • 6
  • 3
    Take a look at this question, I think it might help. https://stackoverflow.com/questions/15653688/preserving-column-order-in-python-pandas-dataframe – Jakob Tinhofer Dec 14 '22 at 16:33

0 Answers0