can you please help me , I was thinking for long time but did not know what to write :(
**I need two values : asin and price
asin = # I need the values that is between <div data-asin="
and "
in the webpage source code
price = # I need the values that is between <span class="a-price" data-a-size="l" data-a-color="base"><span class="a-offscreen">SAR
and </span>
in the webpage source code
from bs4 import BeautifulSoup as soup
from concurrent.futures import ThreadPoolExecutor
import requests
import time
number_of_threads = 6
out_filename = time.strftime('soldbysouq-shopanddodandsupermarket' + "%Y%m%d-%H%M%S")
headers = "price,asin,\n"
def extract_data_from_url_func(url):
print(url)
response = requests.get(url)
page_soup = soup(response.text, "html.parser")
containers = # tried a lot of things without luck
output = ''
for container in containers:
asin = # I need the values that is between **<div data-asin="** and **"** in the webpage source code
price = # I need the values that is between **<span class="a-price" data-a-size="l" data-a-color="base"><span class="a-offscreen">SAR** and **</span>** in the webpage source code
output_list = [price,asin,]
output = output + ",".join(output_list) + "\n"
print(output)
return output
with open("amazonlist1.csv", "r") as fr:
URLS = list(map(lambda x: x.strip(), fr.readlines()))
with ThreadPoolExecutor(max_workers=number_of_threads) as executor:
results = executor.map( extract_data_from_url_func, URLS)
responses = []
for result in results:
responses.append(result)
with open(out_filename, "w", encoding='utf-8-sig') as fw:
fw.write(headers)
for response in responses:
fw.write(response + "\n")