0

I want to scrape the var combinations. It doesn't work with my normal scraping method with attrs. I just want to print out the whole var combinations line. This is my code which won't work:

soup = BeautifulSoup(test1.text, 'html.parser')
        data = soup.find_all('combinations ').string

And I want to scraper this:

var combinations = {"94128":{"attributes_values":{"6":"US 4 EU 36"},"attributes":[246],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T036","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'246'"},"94129":{"attributes_values":{"6":"US 4,5 EU 37"},"attributes":[247],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T037","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'247'"},"94130":{"attributes_values":{"6":"US 5 EU 37,5"},"attributes":[248],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T375","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'248'"},"94131":{"attributes_values":{"6":"US 5,5 EU 38"},"attributes":[249],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T038","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'249'"},"94132":{"attributes_values":{"6":"US 6 EU 39"},"attributes":[250],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T039","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'250'"},"94133":{"attributes_values":{"6":"US 6,5 EU 39,5"},"attributes":[251],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T395","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'251'"},"94134":{"attributes_values":{"6":"US 7 EU 40"},"attributes":[252],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T040","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'252'"},"94135":{"attributes_values":{"6":"US 7,5 EU 40,5"},"attributes":[253],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T405","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'253'"},"94136":{"attributes_values":{"6":"US 8 EU 41,5"},"attributes":[254],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":1,"reference":"ASI006028T415","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'254'"},"94137":{"attributes_values":{"6":"US 8,5 EU 42"},"attributes":[255],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":1,"reference":"ASI006028T042","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'255'"},"94138":{"attributes_values":{"6":"US 9 EU 42,5"},"attributes":[256],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":1,"reference":"ASI006028T425","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'256'"},"94139":{"attributes_values":{"6":"US 9,5 EU 43,5"},"attributes":[257],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":1,"reference":"ASI006028T435","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'257'"},"94140":{"attributes_values":{"6":"US 10 EU 44"},"attributes":[258],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":1,"reference":"ASI006028T044","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'258'"},"94141":{"attributes_values":{"6":"US 10,5 EU 44,5"},"attributes":[259],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":1,"reference":"ASI006028T445","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'259'"},"94142":{"attributes_values":{"6":"US 11 EU 45"},"attributes":[260],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":1,"reference":"ASI006028T045","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'260'"},"94143":{"attributes_values":{"6":"US 11,5 EU 46"},"attributes":[261],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":1,"reference":"ASI006028T046","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'261'"},"94144":{"attributes_values":{"6":"US 12 EU 46,5"},"attributes":[262],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T465","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'262'"},"94145":{"attributes_values":{"6":"US 12.5 EU 47"},"attributes":[263],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T047","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'263'"},"94146":{"attributes_values":{"6":"US 13 EU 48"},"attributes":[789],"price":0,"specific_price":false,"ecotax":0,"weight":0,"quantity":0,"reference":"ASI006028T048","unit_impact":null,"minimal_quantity":"1","available_date":"","hideonline":null,"id_image":-1,"list":"'789'"}};
var combinationsFromController = {"94128":{"attributes_values":{"6":"US 4 EU 36"},"attribu
finefoot
  • 9,914
  • 7
  • 59
  • 102
Pytronik
  • 69
  • 1
  • 7
  • `BeautifulSoup` works with `HTML/XML` but you have text in JavaScript - it will not search in JavaScript. You get JavaScript as string so use string functions to get it - `split()` , slicing `[start:end]`, regex, etc. – furas Feb 14 '20 at 00:34
  • 1
    What is the issue, exactly? Stack Overflow is not a free code writing service. See: [tour], [ask], [help/on-topic], https://meta.stackoverflow.com/questions/261592/how-much-research-effort-is-expected-of-stack-overflow-users. – AMC Feb 14 '20 at 01:45
  • What exactly is your desired output? – Jack Fleeting Feb 14 '20 at 12:34

1 Answers1

2

While generally not recommended to parse webpages (Using regular expressions to parse HTML: why not?), in your case you're interested in JavaScript code, not HTML. So it might be a quick and easy solution, to use re for this, for example:

>>> import re
>>> webpage = """some
... other
... javascript
... code
... var combinations = {"example": [1, 2, 3]};
... var combinationsFromController = {"example": [4, 5, 6]};
... some
... other
... javascript
... code"""
>>> re.findall(r"var combinations = .*", webpage)
['var combinations = {"example": [1, 2, 3]};']
>>> re.findall(r"var combinationsFromController = .*", webpage)
['var combinationsFromController = {"example": [4, 5, 6]};']
finefoot
  • 9,914
  • 7
  • 59
  • 102