I am writing a python program, because I am lazy, that checks a website for a job opening I have been told about and returns all the jobs the companies web page.
Here is my code so far (yes I know the code is jancky however I am just trying to get it working)
import requests
from bs4 import BeautifulSoup
import sys
import os
import hashlib
reload(sys)
sys.setdefaultencoding('utf8')
res = requests.get('WEBSITE URL', verify=False)
res.raise_for_status()
filename = "JobWebsite.txt"
def StartUp():
if not os.path.isfile(filename):
try:
jobfile = open(filename, 'a')
jobfile = open(filename, 'r+')
print("[*] Succesfully Created output file")
return jobfile
except:
print("[*] Error creating output file!")
sys.exit(0)
else:
try:
jobfile = open(filename, 'r+')
print("[*] Succesfully Opened output file")
return jobfile
except:
print("[*] Error opening output file!")
sys.exit(0)
def AnyChange(htmlFile):
fileCont = htmlFile.read()
FileHash = hasher(fileCont, "File Code Hashed")
WebHash = hasher(res.text, "Webpage Code Hashed")
!!!!! Here is the Problem
print ("[*] File hash is " + str(FileHash))
print ("[*] Website hash is " + str(WebHash))
if FileHash == WebHash:
print ("[*] Jobs being read from file!")
num_of_jobs(fileCont)
else:
print("[*] Jobs being read from website!")
num_of_jobs(res.text)
deleteContent(htmlFile)
writeWebContent(htmlFile, res.text)
def hasher(content, message):
content = hashlib.md5(content.encode('utf-8'))
return content
def num_of_jobs(htmlFile):
content = BeautifulSoup(htmlFile, "html.parser")
elems = content.select('.search-result-inner')
print("[*] There are " + str(len(elems)) + " jobs available!")
def deleteContent(htmlFile):
print("[*] Deleting Contents of local file! ")
htmlFile.seek(0)
htmlFile.truncate()
def writeWebContent(htmlFile, content):
htmlFile = open(filename, 'r+')
print("[*] Writing Contents of website to file! ")
htmlFile.write(content.encode('utf-8'))
jobfile = StartUp()
AnyChange(jobfile)
The problem I currently have is that I hash both of the websites html code and the files html code. However both of the hashes don't match, like ever, I am not sure and can only guess that it might be something with the contents being save in a file. The hashes aren't too far apart but it still causes the If statement to fail each time