I have crawler that extract links from page only if the link text include given text and I'm writing the output to html file. Its working but I would like to add whole link text next to these links like this - "Junior Java developer - https://www.jobs.cz/junior-developer/" How can I do this?
Thanks
import requests
from bs4 import BeautifulSoup
import re
def jobs_crawler(max_pages):
page = 1
file_name = 'links.html'
while page < max_pages:
url = 'https://www.jobs.cz/prace/praha/?field%5B%5D=200900011&field%5B%5D=200900012&field%5B%5D=200900013&page=' + str(page)
source_code = requests.get(url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text)
page += 1
file = open(file_name,'w')
for link in soup.find_all('a', {'class': 'search-list__main-info__title__link'}, text=re.compile('IT', re.IGNORECASE)):
href = link.get('href') + '\n'
file.write('<a href="' + href + '">'+ 'LINK TEXT HERE' + '</a>' + '<br />')
print(href)
file.close()
print('Saved to %s' % file_name)
jobs_crawler(5)