6
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time

def checkLinkedIn(command):
    url = f"https://www.linkedin.com/in/{command}"
    path = "C:\Program Files (x86)\chromedriver.exe"
    options = Options()
    options.add_argument("--headless")
    driver = webdriver.Chrome(path, options=options)
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    time.sleep(2)
    driver.quit()
    name = soup.find("h1", attrs={"class": "top-card-layout__title"})
    if name:
        print("LinkedIn profile found")
        print(url)
    else:
        print("No LinkedIn profile found")

def checkTwitter(command):
    url = f"https://www.twitter.com/{command}"
    path = "C:\Program Files (x86)\chromedriver.exe"
    options = Options()
    options.add_argument("--headless")
    driver = webdriver.Chrome(path, options=options)
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    time.sleep(2)
    driver.quit()
    at_tag = soup.find("div", attrs={"dir": "ltr"})
    print(soup.text)
    if at_tag:
        print("Twitter profile found")
        print(url)
    else:
        print("No Twitter profile found")

usrname = input("--> ")

checkTwitter(usrname)

The LinkedIn function works. However, the Twitter one comes up with this:

JavaScript is not available. We’ve detected that JavaScript is disabled in this browser. Please enable JavaScript or switch to a supported browser to continue using twitter.com. You can see a list of supported browsers in our Help Centre.

How do I enable Javascript in a headless Chrome? Thanks in advance.

Tom
  • 440
  • 3
  • 10

2 Answers2

17

This maybe because the website detects it's a headless browser and disables some features.

To get around it you can spoof (as much as possible) the identity of the headless browser to trick the website.

Try the following options:

from fake_useragent import UserAgent

options = webdriver.ChromeOptions()

options.add_argument('--headless')
options.add_argument("--incognito")
options.add_argument("--nogpu")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1280,1280")
options.add_argument("--no-sandbox")
options.add_argument("--enable-javascript")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument('--disable-blink-features=AutomationControlled')

ua = UserAgent()
userAgent = ua.random

driver = webdriver.Chrome(options=options)
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": userAgent})

This worked for me with a particular stubborn website. The options I gathered from many SO answers but particularly this one: https://stackoverflow.com/a/53040904/5339857

Roy Shilkrot
  • 3,079
  • 29
  • 25
1

use

options.add_argument("--enable-javascript")
cruisepandey
  • 28,520
  • 6
  • 20
  • 38