1

Wroks just for the first iteration. I need copy link of the posts. It gives me link for the first iteration just. It is Linkedin Posts link scraper. I want to get copy link of the posts for further scraping and all. Kindly see the issue. I doesn't click the button on the second post but does on the first post just.

from selenium import webdriver
import sys
import os
from selenium.webdriver.common.keys import Keys
import time
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import clipboard

from selenium.webdriver.support import expected_conditions
'''if len(sys.argv) > 2:
    username = sys.argv[1]
    try:
        maxcount = int(sys.argv[2])
    except:
        sys.ext("Infinite scrolling limit must be an integer")
else:
    sys.exit("Username and/or infinite scrolling limit not specified.")'''

username=''

driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get('https://www.linkedin.com')

pause = input("Press Enter once login is complete...")

driver.get('https://www.linkedin.com/search/results/content/keywords=apple&origin=QUERY_SUGGESTION')
maxcount=1
count = 0
while (count < maxcount):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(1)
    count = count + 1

print("Looking for hidden ariadivs")

ariadivs = []
ariadivs = driver.find_elements_by_xpath("//div[@class='feed-shared-update-v2__control-menu absolute text-align-right feed-shared-control-menu ember-view']")

print("Found this many ariasdivs:")
print(len(ariadivs))

print("Unhiding and clicking on arias")
'''soup=BeautifulSoup(driver.page_source,features='lxml')
print(soup.prettify())'''
count=0
for idx,ariadiv in enumerate(ariadivs):
    driver.execute_script("arguments[0].scrollIntoView(true);", ariadiv)
    driver.execute_script("window.scrollBy(0, -90);")
    time.sleep(0.4)
    time.sleep(5)
    ariabutton = ariadiv.find_element_by_xpath("//button[@class='feed-shared-control-menu__trigger artdeco-button artdeco-button--tertiary artdeco-button--muted artdeco-button--1 artdeco-button--circle artdeco-dropdown__trigger artdeco-dropdown__trigger--placement-bottom ember-view']")
    #ariabutton=ariadiv.find_element_by_class_name('feed-shared-control-menu__trigger artdeco-button artdeco-button--tertiary artdeco-button--muted artdeco-button--1 artdeco-button--circle artdeco-dropdown__trigger artdeco-dropdown__trigger--placement-bottom ember-view')
    ariabutton.click()
    
    time.sleep(5)
    soup=BeautifulSoup(ariadivs[1].get_attribute('innerHTML'),features='lxml')
    print(soup.prettify())
    listitem = ariadiv.find_element_by_xpath("//li[@class='feed-shared-control-menu__item option-share-via']").click()
    
    print(clipboard.paste())
    '''link = listitem.find_element_by_css_selector("span[class='feed-shared-control-menu__headline t-14 t-black t-bold']")
    try:
        driver.execute_script("arguments[0].click();", link)
    except Exception as e:
        print(e)'''
    time.sleep(10)
    count+=1

print("Recovering the links")

saves = []
saves = driver.find_elements_by_class_name('artdeco-toast-item__cta')

print("Found this many save links:")
print(len(saves))

for save in saves:
    print(save.get_attribute("href"))

'''f = open("post-links.csv","w+")
for save in saves:
    f.write(save.get_attribute("href") + "\n") 
f.close()'''

0 Answers0