BeautifulSoup is for cleaning the html gotten after sending http request, in your case you should :
1. Send http request to your target website with requests
module. (with appropriate headers).
2. Select the json data of the response.
3. Iterate over the list of products.
4. For each product get the img_urls .
5. Send a new request to get each image in your list of urls.
6. Save the image.
Code :
Note : you should update the cookie in the headers to get a response.
import requests
from os.path import basename
from urllib.parse import urlparse
URL = 'https://atlas-main.kube.jooraccess.com/graphql'
headers = {"accept": "*/*","Accept-Encoding": "gzip, deflate, br","Accept-Language": "fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7","Connection": "keep-alive","Content-Length": "2249","content-type": "application/json","Cookie":"_hjSessionUser_686103=eyJpZCI6ImY4MTZjN2YxLWJlYmQtNTg2ZC1iYmRkLTllYjdhNGQzNmVjYiIsImNyZWF0ZWQiOjE2NDYxMTkwMDUyODcsImV4aXN0aW5nIjp0cnVlfQ==; _hjSession_686103=eyJpZCI6ImM5MWJmOGRhLTcwZDEtNGQ2ZS04MzA1LTQ4NWNlYTYzZGMwNSIsImNyZWF0ZWQiOjE2NDYxMjc3MDQ5MjgsImluU2FtcGxlIjp0cnVlfQ==; _hjAbsoluteSessionInProgress=0; mp_2e072c90929b30e1ea5d9fd56399f106_mixpanel=%7B%22distinct_id%22%3A%20%2217f4456c057375-062236d0c47071-a3e3164-144000-17f4456c05857f%22%2C%22%24device_id%22%3A%20%2217f4456c057375-062236d0c47071-a3e3164-144000-17f4456c05857f%22%2C%22%24initial_referrer%22%3A%20%22%24direct%22%2C%22%24initial_referring_domain%22%3A%20%22%24direct%22%2C%22accountId%22%3A%20null%2C%22canShop%22%3A%20false%2C%22canTransact%22%3A%20false%2C%22canViewAssortments%22%3A%20false%2C%22canViewDataPortal%22%3A%20false%2C%22userId%22%3A%20null%2C%22accountUserId%22%3A%20null%2C%22isAdmin%22%3A%20false%2C%22loggedAsAdmin%22%3A%20false%2C%22retailerSettings%22%3A%20false%2C%22assortmentPlanning%22%3A%20false%2C%22accountType%22%3A%201%7D","Host": "atlas-main.kube.jooraccess.com","Origin": "https://www.jooraccess.com","Referer": "https://www.jooraccess.com/","sec-ch-ua-mobile": "?0","sec-ch-ua-platform": "Windows","Sec-Fetch-Dest": "empty","Sec-Fetch-Mode": "cors","Sec-Fetch-Site": "same-site","User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36",}
result = requests.get(URL, headers ).json() # you may need headers argument so you should add it in this case
data = result["data"]["public"]["collectionProductsByShareToken"]["edges"]
for d in data:
img_urls = d["product"]["imageUrls"]
for img_url in img_urls:
img_data = requests.get(img_url).content
img_name = basename(urlparse(img_url).path)
with open(img_name , 'wb') as handle:
response = requests.get(img_url, stream=True)
if not response.ok:
print(response)
for block in response.iter_content(1024):
if not block:
break
handle.write(block)