I am unable to extract the content from this website. I have tried adding different headers, but I still not able to scrap data from this website.
import requests
from bs4 import BeautifulSoup
seedURL = 'https://www.owler.com/location/new-york-companies?p=2'
# headers = requests.utils.default_headers()
# headers.update({
# 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
# })
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}
req_content = requests.get(seedURL, headers=headers)
data = BeautifulSoup(req_content.content,"lxml")
print(data)
This is the response that I get
<!DOCTYPE html>
<html>
<head>
<meta content="NOINDEX, NOFOLLOW" name="ROBOTS"/>
<meta content="max-age=0" http-equiv="cache-control"/>
<meta content="no-cache" http-equiv="cache-control"/>
<meta content="0" http-equiv="expires"/>
<meta content="Tue, 01 Jan 1980 1:00:00 GMT" http-equiv="expires"/>
<meta content="no-cache" http-equiv="pragma"/>
<meta content="10; url=/distil_r_captcha.html?requestId=c4aceb58-d5b5-480d-a09f-dafd9cca7cbe&httpReferrer=%2Flocation%2Fnew-york-companies%3Fp%3D2" http-equiv="refresh"/>
<script type="text/javascript">
(function(window){
try {
if (typeof sessionStorage !== 'undefined'){
sessionStorage.setItem('distil_referrer', document.referrer);
}
} catch (e){}
})(window);
</script>
<script defer="" src="/owlerdstl.js" type="text/javascript"></script><style type="text/css">#d__fFH{position:absolute;top:-5000px;left:-5000px}#d__fF{font-family:serif;font-size:200px;visibility:hidden}#dqfubwvfuxfsxffus{display:none!important}</style></head>
<body>
<div id="distilIdentificationBlock"> </div>
</body>
</html>