I made this script its for checking if the github profiles contain email and sort them i 2 separate list, it not finished need to optimize and get rid of extra stuff.
The main problem is that i was running with terminal using python, and that 2.7 by my system, when I wanted to run it with python 3 , I got errors, I tried debugging them but still couldn't fix the problem.
Here is my code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from timeit import default_timer as timer
start = timer()
timeout = 1
haveEmail = []
noEmail = []
options = webdriver.ChromeOptions()
options.add_argument("headless")
#YOU NEED TO PROVIDE THE LOCATION OF YOU CHROME DRIVER
browser = webdriver.Chrome('/home/djurovic/Desktop/Linux ChromeDriver/chromedriver', chrome_options=options)
def login(email):
browser.get(email)
signInXpath = '//a[@class="HeaderMenu-link no-underline mr-3"]'
signInElement = WebDriverWait(browser, timeout).until(lambda browser: browser.find_element_by_xpath(signInXpath))
signInElement.click()
#ENTER YOUR LOGIN INFORMATION
username = ''
password = ''
userNameXpath = '//input[@class="form-control input-block"]'
passwordXpath = '//input[@class="form-control form-control input-block"]'
loginButtonXpath = '//input[@class="btn btn-primary btn-block"]'
userNameElement = WebDriverWait(browser, timeout).until(lambda browser: browser.find_element_by_xpath(userNameXpath))
passwordElement = WebDriverWait(browser, timeout).until(lambda browser: browser.find_element_by_xpath(passwordXpath))
userNameElement.clear()
userNameElement.send_keys(username)
passwordElement.clear()
passwordElement.send_keys(password)
loginButtonElement = WebDriverWait(browser, timeout).until(lambda browser: browser.find_element_by_xpath(loginButtonXpath))
loginButtonElement.click()
def checkEmail(profile):
browser.get(profile)
emailXPath = '//a[@class="u-email"]'
try:
emailElement = WebDriverWait(browser, timeout).until(lambda browser: browser.find_element_by_xpath(emailXPath))
haveEmail.append(profile)
except:
noEmail.append(profile)
def parse():
i = 1
document = open('profiles.txt', 'rb')
for profile in document:
if i == 1:
login(profile)
i = i + 1
checkEmail(profile)
browser.close()
parse()
print(haveEmail)
print(noEmail)
for item in haveEmail:
output_file = open("profiles_with_email.txt", 'a')
for i in str(len(haveEmail)):
output_file.write(item + "\n")
output_file.close()
for item in noEmail:
output_file = open("profiles_with_no_email.txt", 'a')
for i in str(len(noEmail)):
output_file.write(item + "\n")
output_file.close()
elapsed_time = timer() - start
print("Script finished in " + str(elapsed_time))
Here are the errors that I get when run it through python 3 :
Traceback (most recent call last):
File "github.py", line 77, in <module>
parse()
File "github.py", line 72, in parse
login(profile)
File "github.py", line 33, in login
browser.get(email)
File "/usr/local/lib/python3.6/dist-packages/selenium/webdriver/remote/webdriver.py", line 333, in get
self.execute(Command.GET, {'url': url})
File "/usr/local/lib/python3.6/dist-packages/selenium/webdriver/remote/webdriver.py", line 319, in execute
response = self.command_executor.execute(driver_command, params)
File "/usr/local/lib/python3.6/dist-packages/selenium/webdriver/remote/remote_connection.py", line 372, in execute
data = utils.dump_json(params)
File "/usr/local/lib/python3.6/dist-packages/selenium/webdriver/remote/utils.py", line 33, in dump_json
return json.dumps(json_struct)
File "/usr/lib/python3.6/json/__init__.py", line 231, in dumps
return _default_encoder.encode(obj)
File "/usr/lib/python3.6/json/encoder.py", line 199, in encode
chunks = self.iterencode(o, _one_shot=True)
File "/usr/lib/python3.6/json/encoder.py", line 257, in iterencode
return _iterencode(o, 0)
File "/usr/lib/python3.6/json/encoder.py", line 180, in default
o.__class__.__name__)
TypeError: Object of type 'bytes' is not JSON serializable
Any kind of help would be a lot of help!!!