I'm working on scraping from barchart.com using modified code from this stack overflow question:
The header and payload information are from the XHR of the website I was attempting to scrape.
from urllib.parse import unquote
geturl=r'https://www.barchart.com/options/highest-implied-volatility'
apiurl=r'https://www.barchart.com/proxies/core-api/v1/quotes/get'
getheaders={
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'cache-control': 'max-age=0',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36'
}
getpay={
'page': 'all'
}
s=requests.Session()
r=s.get(geturl,params=getpay, headers=getheaders)
headersIV = {
'method': 'GET',
'scheme': 'https',
'authority': 'www.barchart.com',
'Host' : 'www.barchart.com',
'Accept': 'application/json',
'Accept-Encoding': 'gzip, deflate, br',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15',
'Accept-Language': 'en-us',
'Referer': 'https://www.barchart.com/options/highest-implied-volatility',
'Connection': 'keep-alive',
'X-XSRF-TOKEN': 'eyJpdiI6Ik8vQTBkcGxZVVF1aG5QeE9TUnk5L3c9PSIsInZhbHVlIjoiMDd6STJyM1FPZEtMMFdLNEcrVjNNWUMva1l3WWxwblMvdEFZMEIzSllzalFySGFoblcyRzgrRmNZa1RMRHdZcTlBVExQTjBQUEhVdTVaNWhMZlJ0ZFM4c3ZaeHMvVmptM2FGQXJobnM1WTl1REx1d3M1eDI2RUc2SEtHY2wzTnUiLCJtYWMiOiIyNGExYjI3N2JkOGRiZGEwYjY4MTQ3OGFiYmYxZGE3ZmJhZmQyMDQwM2NiZTc0YTMzZDFkNjI4ZGIwZmY2YTU0In0=',
'path': '/proxies/core-api/v1/options/get?fields=symbol%2CbaseSymbol%2CbaseLastPrice%2CbaseSymbolType%2CsymbolType%2CstrikePrice%2CexpirationDate%2CdaysToExpiration%2CbidPrice%2Cmidpoint%2CaskPrice%2ClastPrice%2Cvolume%2CopenInterest%2CvolumeOpenInterestRatio%2Cvolatility%2CtradeTime%2CsymbolCode%2ChasOptions&orderBy=volatility&baseSymbolTypes=stock&between(lastPrice%2C.10%2C)=&between(daysToExpiration%2C15%2C)=&between(tradeTime%2C2021-10-21%2C2021-10-22)=&orderDir=desc&between(volatility%2C60%2C)=&limit=200&between(volume%2C500%2C)=&between(openInterest%2C100%2C)=&in(exchange%2C(AMEX%2CNASDAQ%2CNYSE))=&meta=field.shortName%2Cfield.type%2Cfield.description&hasOptions=true&raw=1',
}
payloadIV={
'fields': 'symbol,baseSymbol,baseLastPrice,baseSymbolType,symbolType,strikePrice,expirationDate,daysToExpiration,bidPrice,midpoint,askPrice,lastPrice,volume,openInterest,volumeOpenInterestRatio,volatility,tradeTime,symbolCode,hasOptions',
'orderBy': 'volatility',
'baseSymbolTypes': 'stock',
'between(lastPrice,.10,)':'',
'between(daysToExpiration,15,)':'',
'between(tradeTime,2021-10-21,2021-10-22)':'',
'orderDir': 'desc',
'between(volatility,60,)':'',
'limit': '200',
'between(volume,500,)':'',
'between(openInterest,100,)':'',
'in(exchange,(AMEX,NASDAQ,NYSE))':'',
'meta': 'field.shortName,field.type,field.description',
'hasOptions': 'true',
'raw': '1'
}
r=s.get(apiurl,params=payloadIV,headers=headersIV)
j=r.json()
print(j)
It returns this error message: {'error': {'message': 'Internal error.', 'code': 500}}
I am pretty new to scraping data using API and XHR data. I think I might be doing many things correctly right now but I don't know where I might be making the mistake.