I want to scrape the text of an h3
with class as shown in the attached photo.
I modified the code based on the posted recommendation:
import requests
import urllib
session = requests.session()
session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0',
'Accept': '*/*',
'Accept-Language': 'de,en-US;q=0.7,en;q=0.3',
'Content-Type': 'application/json',
'Origin': 'https://auth.fool.com',
'Connection': 'keep-alive',
})
response1 = session.get("https://www.fool.com/secure/login.aspx")
assert response1
response1.cookies
#<RequestsCookieJar[Cookie(version=0, name='_csrf', value='8PrzU3pSVQ12xoLeq2y7TuE1', port=None, port_specified=False, domain='auth.fool.com', domain_specified=False, domain_initial_dot=False, path='/usernamepassword/login', path_specified=True, secure=True, expires=1609597114, discard=False, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False)]>
params = urllib.parse.parse_qs(response1.url)
params
payload = {
"client_id": params["client"][0],
"redirect_uri": "https://www.fool.com/premium/auth/callback/",
"tenant": "fool",
"response_type": "code",
"scope": "openid email profile",
"state": params["https://auth.fool.com/login?state"][0],
"_intstate": "deprecated",
"nonce": params["nonce"][0],
"password": "XXX",
"connection": "TMF-Reg-API",
"username": "XXX",
}
formatted_payload = "{" + ",".join([f'"{key}":"{value}"' for key, value in payload.items()]) + "}"
url = "https://auth.fool.com/usernamepassword/login"
response2 = session.post(url, data=formatted_payload)
response2.cookies
#<RequestsCookieJar[]>
response2.cookies is empty thus it seems that the login fails.