1

I'm working on scraping from barchart.com using modified code from this stack overflow question:

The header and payload information are from the XHR of the website I was attempting to scrape.

from urllib.parse import unquote

geturl=r'https://www.barchart.com/options/highest-implied-volatility'
apiurl=r'https://www.barchart.com/proxies/core-api/v1/quotes/get'


getheaders={
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'en-US,en;q=0.9',
    'cache-control': 'max-age=0',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36'
    }

getpay={
    'page': 'all'
}

s=requests.Session()
r=s.get(geturl,params=getpay, headers=getheaders)

headersIV = {
    'method': 'GET',
    'scheme': 'https',
    'authority': 'www.barchart.com',
    'Host' : 'www.barchart.com',
    'Accept': 'application/json',
    'Accept-Encoding': 'gzip, deflate, br',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15',
    'Accept-Language': 'en-us',
    'Referer': 'https://www.barchart.com/options/highest-implied-volatility',
    'Connection': 'keep-alive',
    'X-XSRF-TOKEN': 'eyJpdiI6Ik8vQTBkcGxZVVF1aG5QeE9TUnk5L3c9PSIsInZhbHVlIjoiMDd6STJyM1FPZEtMMFdLNEcrVjNNWUMva1l3WWxwblMvdEFZMEIzSllzalFySGFoblcyRzgrRmNZa1RMRHdZcTlBVExQTjBQUEhVdTVaNWhMZlJ0ZFM4c3ZaeHMvVmptM2FGQXJobnM1WTl1REx1d3M1eDI2RUc2SEtHY2wzTnUiLCJtYWMiOiIyNGExYjI3N2JkOGRiZGEwYjY4MTQ3OGFiYmYxZGE3ZmJhZmQyMDQwM2NiZTc0YTMzZDFkNjI4ZGIwZmY2YTU0In0=',
    'path': '/proxies/core-api/v1/options/get?fields=symbol%2CbaseSymbol%2CbaseLastPrice%2CbaseSymbolType%2CsymbolType%2CstrikePrice%2CexpirationDate%2CdaysToExpiration%2CbidPrice%2Cmidpoint%2CaskPrice%2ClastPrice%2Cvolume%2CopenInterest%2CvolumeOpenInterestRatio%2Cvolatility%2CtradeTime%2CsymbolCode%2ChasOptions&orderBy=volatility&baseSymbolTypes=stock&between(lastPrice%2C.10%2C)=&between(daysToExpiration%2C15%2C)=&between(tradeTime%2C2021-10-21%2C2021-10-22)=&orderDir=desc&between(volatility%2C60%2C)=&limit=200&between(volume%2C500%2C)=&between(openInterest%2C100%2C)=&in(exchange%2C(AMEX%2CNASDAQ%2CNYSE))=&meta=field.shortName%2Cfield.type%2Cfield.description&hasOptions=true&raw=1', 

}

payloadIV={
    'fields': 'symbol,baseSymbol,baseLastPrice,baseSymbolType,symbolType,strikePrice,expirationDate,daysToExpiration,bidPrice,midpoint,askPrice,lastPrice,volume,openInterest,volumeOpenInterestRatio,volatility,tradeTime,symbolCode,hasOptions',
    'orderBy': 'volatility',
    'baseSymbolTypes': 'stock',
    'between(lastPrice,.10,)':'',
    'between(daysToExpiration,15,)':'',
    'between(tradeTime,2021-10-21,2021-10-22)':'',
    'orderDir': 'desc',
    'between(volatility,60,)':'',
    'limit': '200',
    'between(volume,500,)':'',
    'between(openInterest,100,)':'',
    'in(exchange,(AMEX,NASDAQ,NYSE))':'',
    'meta': 'field.shortName,field.type,field.description',
    'hasOptions': 'true',
    'raw': '1'

}



r=s.get(apiurl,params=payloadIV,headers=headersIV)
j=r.json()
print(j)

It returns this error message: {'error': {'message': 'Internal error.', 'code': 500}}

I am pretty new to scraping data using API and XHR data. I think I might be doing many things correctly right now but I don't know where I might be making the mistake.

chem.cs
  • 53
  • 8

0 Answers0