I'm web scraping a site for data using beautifulsoup4, and I'm not sure how to be specific to the data I want, without calling an unwanted object. I've failed to get rid of it.
import requests
from bs4 import BeautifulSoup
headers = {'User-agent': 'Mozilla/5.0 (Windows 10; Win64; x64; rv:101.0.1) Gecko/20100101 Firefox/101.0.1'}
url = "https://elitejobstoday.com/job-category/education-jobs-in-uganda/"
r = requests.get(url, headers = headers)
c = r.content
soup = BeautifulSoup(c, "html.parser")
table = soup.find("div", attrs={"article": "loadmore-item"})
def jobScan(link):
the_job = {}
job = requests.get(url, headers = headers)
jobC = job.content
jobSoup = BeautifulSoup(jobC, "html.parser")
name = jobSoup.find("h3", attrs={"class": "loop-item-title"})
title = name.a.text
the_job['title'] = title
print('The job is: {}'.format(title))
print(the_job)
return the_job
jobScan(table)
this is the result it fetches
PS C:\Users\MUHUMUZA IVAN\Desktop\JobPortal> py absa.py
The job is: 25 Credit Officers (Group lending) at ENCOT Microfinance Ltd
{'urlLink': 'https://elitejobstoday.com/job-category/education-jobs-in-uganda/', 'title': '25 Credit Officers (Group lending) at ENCOT Microfinance Ltd'}
I want to be able to retain "The job is: 25 Credit Officers (Group lending) at ENCOT Microfinance Ltd"
and drop "{'urlLink': 'https://elitejobstoday.com/job-category/education-jobs-in-uganda/', 'title': '25 Credit Officers (Group lending) at ENCOT Microfinance Ltd'}"