Step 1: Get some HTML from a web page
Step 2: Use Beautiful Soup package to parse the HTML (Learn about Beautiful Soup if you don't have prior knowledge 'https://pypi.org/project/beautifulsoup4/')
Step 3: List the elements that are not required (eg-header, meta, script)
import requests
from bs4 import BeautifulSoup
url = 'https://www.zzz.com/yyy/ #give any url
res = requests.get(url)
html_page = res.content
soup = BeautifulSoup(html_page, 'html.parser')
text = soup.find_all(text=True)
output = ''
blacklist = [
'[document]',
'noscript',
'header',
'html',
'meta',
'head',
'input',
'script',
# name more elements if not required
]
for t in text:
if t.parent.name not in blacklist:
output += '{} '.format(t)
print(output)