0
example_string = "Bla bla {"value": "1"} bla bla"

Is there a way I can search through a string with the goal of extracting only the dictionary(s) without using regular expressions?

I am trying to extract a dictionary in a script tag of a website.

EDIT: I added the exact string below. Didn't want to use regex simply because I am not familiar with it and was looking for an easier way (performance is not an issue). I tried the suggested solution with regex with no succes.

"""        var spConfig = new Product.Config({"attributes":{"6993":{"id":"6993","code":"irs_02010201_asafstand","label":"Asafstand","multiselect":false,"options":[{"id":"289022","label":"72 mm","price":0,"oldPrice":-45.76,"products":["1212907","1280900"]},{"id":"289025","label":"92 mm","price":0,"oldPrice":-45.76,"products":["1280407","1280899","1289700","1289750"]}]},"6994":{"id":"6994","code":"irs_02010201_doornmaat","label":"Doornmaat","multiselect":false,"options":[{"id":"306161","label":"35 mm","price":0,"oldPrice":-45.76,"products":["1280899"]},{"id":"306205","label":"40 mm","price":0,"oldPrice":-45.76,"products":["1289750"]},{"id":"306192","label":"45 mm","price":0,"oldPrice":-45.76,"products":["1289700"]},{"id":"306194","label":"50 mm","price":0,"oldPrice":-45.76,"products":["1280900"]},{"id":"306034","label":"55 mm","price":0,"oldPrice":-45.76,"products":["1212907","1280407"]}]},"6995":{"id":"6995","code":"irs_02010201_krukhoogte","label":"Krukhoogte","multiselect":false,"options":[{"id":"306193","label":"1050 mm","price":0,"oldPrice":-45.76,"products":["1212907","1280407","1280899","1280900","1289700","1289750"]}]},"6996":{"id":"6996","code":"irs_02010201_kruknoot","label":"Kruknoot","multiselect":false,"options":[{"id":"289046","label":"8 mm","price":0,"oldPrice":-45.76,"products":["1212907","1280407","1280899","1280900","1289700","1289750"]}]},"6997":{"id":"6997","code":"irs_02010201_materiaa_eb7faf79","label":"Materiaal voorplaat","multiselect":false,"options":[{"id":"289083","label":"Verzinkt","price":0,"oldPrice":-45.76,"products":["1212907","1280407","1280899","1280900","1289700","1289750"]}]},"6998":{"id":"6998","code":"irs_02010201_uitvoering","label":"Type sluitpunt","multiselect":false,"options":[{"id":"289092","label":"Rolnok","price":0,"oldPrice":-45.76,"products":["1212907","1280407","1280899","1280900","1289700","1289750"]}]},"6999":{"id":"6999","code":"irs_02010201_uitvoering2","label":"Uitvoering","multiselect":false,"options":[{"id":"289111","label":"Standaard","price":0,"oldPrice":-45.76,"products":["1212907","1280407","1280899","1280900","1289700","1289750"]}]},"7000":{"id":"7000","code":"irs_02010201_voorplaat","label":"Voorplaat","multiselect":false,"options":[{"id":"291379","label":"F16","price":0,"oldPrice":0.93,"products":["1212907","1280407","1280899","1280900","1289700","1289750"]}]}},"template":"#{price}\u00a0\u20ac","basePrice":0,"oldPrice":0,"productId":"1347328","chooseText":"Kies een optie","taxConfig":{"includeTax":true,"showIncludeTax":false,"showBothPrices":false,"defaultTax":0,"currentTax":0,"inclTaxTitle":"Incl. BTW"},"extra_attributes":{"additional_cost":{"1212907":0,"1280407":0,"1280899":0,"1280900":0,"1289700":0,"1289750":0},"additional_cost_description":{"1212907":null,"1280407":null,"1280899":null,"1280900":null,"1289700":null,"1289750":null},"inriver_item_art_number":{"1212907":"036553024","1280407":"036553014","1280899":"036353002","1280900":"036503006","1289700":"036453006","1289750":"A50704007"},"inriver_item_package_qty":{"1212907":"0","1280407":"0","1280899":"0","1280900":"0","1289700":"0","1289750":"0"},"inriver_item_sales_unit":{"1212907":"STUK","1280407":"STUK","1280899":"STUK","1280900":"STUK","1289700":"STUK","1289750":"STUK"},"inriver_item_supplier_art_nr":{"1212907":"6-30710-PU-0-1","1280407":"6-32158-05-0-1","1280899":"6-30710-AQ-0-1","1280900":"6-30710-PT-0-1","1289700":"6-32159-01-0-1","1289750":"6-32234-03-0-1"},"level_price":{"1212907":{"id":"54257","product_id":"1212907","a_price":"30.4700","b_price":"29.3400","c_price":"25.7900","d_price":"24.4800","e_price":"46.6900","preh":null,"staffle_a":"1.0000","staffle_b":"10.0000","staffle_c":"50.0000","staffle_d":"100.0000","updated_at":"2018-08-14 21:32:55","_base_price_group":"B","_base_price":"29.3400","_tier_price_options":{"group":{"B":"10.0000","C":"50.0000","D":"100.0000"},"price":{"B":"29.3400","C":"25.7900","D":"24.4800"}},"_final_price_group":"B","_final_price":"29.3400","promo_price":null},"1280407":{"id":"179868","product_id":"1280407","a_price":"34.5300","b_price":"32.2800","c_price":"29.1500","d_price":"26.9000","e_price":"56.9700","preh":null,"staffle_a":"1.0000","staffle_b":"10.0000","staffle_c":"50.0000","staffle_d":"100.0000","updated_at":"2018-08-14 22:18:16","_base_price_group":"B","_base_price":"32.2800","_tier_price_options":{"group":{"B":"10.0000","C":"50.0000","D":"100.0000"},"price":{"B":"32.2800","C":"29.1500","D":"26.9000"}},"_final_price_group":"B","_final_price":"32.2800","promo_price":null},"1280899":{"id":"179601","product_id":"1280899","a_price":"28.3000","b_price":"27.2600","c_price":"23.9600","d_price":"22.9600","e_price":"52.5100","preh":null,"staffle_a":"1.0000","staffle_b":"10.0000","staffle_c":"50.0000","staffle_d":"100.0000","updated_at":"2018-08-14 22:18:16","_base_price_group":"B","_base_price":"27.2600","_tier_price_options":{"group":{"B":"10.0000","C":"50.0000","D":"100.0000"},"price":{"B":"27.2600","C":"23.9600","D":"22.9600"}},"_final_price_group":"B","_final_price":"27.2600","promo_price":null},"1280900":{"id":"179602","product_id":"1280900","a_price":"31.7500","b_price":"30.5800","c_price":"26.8800","d_price":"25.5100","e_price":"48.6700","preh":null,"staffle_a":"1.0000","staffle_b":"10.0000","staffle_c":"50.0000","staffle_d":"100.0000","updated_at":"2018-08-14 22:18:16","_base_price_group":"B","_base_price":"30.5800","_tier_price_options":{"group":{"B":"10.0000","C":"50.0000","D":"100.0000"},"price":{"B":"30.5800","C":"26.8800","D":"25.5100"}},"_final_price_group":"B","_final_price":"30.5800","promo_price":null},"1289700":{"id":"194219","product_id":"1289700","a_price":"32.0700","b_price":"29.8600","c_price":"26.3300","d_price":"24.2200","e_price":"45.7600","preh":null,"staffle_a":"1.0000","staffle_b":"10.0000","staffle_c":"50.0000","staffle_d":"100.0000","updated_at":"2018-08-14 22:25:57","_base_price_group":"B","_base_price":"29.8600","_tier_price_options":{"group":{"B":"10.0000","C":"50.0000","D":"100.0000"},"price":{"B":"29.8600","C":"26.3300","D":"24.2200"}},"_final_price_group":"B","_final_price":"29.8600","promo_price":null},"1289750":{"id":"194253","product_id":"1289750","a_price":"44.9200","b_price":"42.0000","c_price":"37.9200","d_price":"35.0000","e_price":"74.1100","preh":null,"staffle_a":"1.0000","staffle_b":"10.0000","staffle_c":"50.0000","staffle_d":"100.0000","updated_at":"2018-08-14 22:25:57","_base_price_group":"B","_base_price":"42.0000","_tier_price_options":{"group":{"B":"10.0000","C":"50.0000","D":"100.0000"},"price":{"B":"42.0000","C":"37.9200","D":"35.0000"}},"_final_price_group":"B","_final_price":"42.0000","promo_price":null}},"name":{"1212907":"6-30710-PU-0-1 SECURY EUROPA R4 55\/72\/8\/16 1050  ( KRUK )  4 ROLTAPPEN","1280407":"6-32158-05-0-1 EUROPA R4 55-92-8 F16 (1050) (KRUK BED.) 4 ROLNOKKEN","1280899":"6-30710-AQ-0-1 SECURY EUROPA 35-92-8-F16 R4 1050 (KRUK) 4 ROL (EX 036 35 30 01)","1280900":"6-30710-PT-0-1 SECURY EUROPA R4 50\/72\/8\/16 1050 (KRUK) 4 ROL. \/TAND","1289700":"6-32159-01-0-1 GU SECURY EUROPA S 45-92-8 F16 R4 - 1050 - 4 ROLNOKKEN (KRUKBED.)","1289750":"6-32234-03-0-1 GU EUROPA V 40-92-8 F16  R4"},"sku":{"1212907":"DVRDOAAVBC","1280407":"RGUAAAA8Z7","1280899":"RGUAAAALOP","1280900":"RGUAAAALOT","1289700":"SMPCRAAAD4","1289750":"SMPCRAAAGO"},"stock_type":{"1212907":"Stockartikel","1280407":"Stockartikel","1280899":"Stockartikel","1280900":"Stockartikel","1289700":"Stockartikel","1289750":"Bestelartikel"},"lecot_product_badge":{"1212907":null,"1280407":null,"1280899":null,"1280900":null,"1289700":null,"1289750":null},"resources":{"1212907":{"inriver_resource_technischetek":[{"value_id":"4768791","file":"\/1\/5\/157569_036553024_technischetekening_01.jpg","product_id":"1212907","label":"036553024_TechnischeTekening_01.eps","position":"0","disabled":"0","label_default":"036553024_TechnischeTekening_01.eps","position_default":"0","disabled_default":"0"}]},"1280899":{"inriver_resource_technischetek":[{"value_id":"4773546","file":"\/1\/5\/157670_036353002_foto_01.jpg","product_id":"1280899","label":"036353002_foto_01.jpg","position":"0","disabled":"0","label_default":"036353002_foto_01.jpg","position_default":"0","disabled_default":"0"}]},"1280900":{"inriver_resource_technischetek":[{"value_id":"4768957","file":"\/1\/5\/159563_036503006_technischetekening_01.jpg","product_id":"1280900","label":"036503006_TechnischeTekening_01.eps","position":"0","disabled":"0","label_default":"036503006_TechnischeTekening_01.eps","position_default":"0","disabled_default":"0"}]}},"is_special_price":{"1212907":false,"1280407":false,"1280899":false,"1280900":false,"1289700":false,"1289750":false}},"searchable_attributes":["sku","name","inriver_item_art_number","inriver_item_supplier_art_nr"],"stock_data":{"1212907":{"product_id":"1212907","recommended_sales_qty":"1.0000","required_sales_qty":null},"1280407":{"product_id":"1280407","recommended_sales_qty":"5.0000","required_sales_qty":null},"1280899":{"product_id":"1280899","recommended_sales_qty":"1.0000","required_sales_qty":null},"1280900":{"product_id":"1280900","recommended_sales_qty":"1.0000","required_sales_qty":null},"1289700":{"product_id":"1289700","recommended_sales_qty":"1.0000","required_sales_qty":null},"1289750":{"product_id":"1289750","recommended_sales_qty":"1.0000","required_sales_qty":null}}});
"""
Bjorn Garcia
  • 339
  • 3
  • 5
  • 2
    Why you can't use regular expressions? – toti08 Aug 15 '18 at 15:36
  • 2
    Is there a reason you don't want to use regex? – rahlf23 Aug 15 '18 at 15:36
  • Is there always going to be exactly one embedded dictionary, and no other braces in the string? – abarnert Aug 15 '18 at 15:40
  • Regular expressions cannot parse arbitrary JavaScript. – chepner Aug 15 '18 at 15:41
  • There are quite a few references if you simply search "string to dict python" on Stack Overflow. How is it that none of these solved your problem, or allowed you to write any code for this? – Prune Aug 15 '18 at 15:43
  • 1
    you can use a stack to find matching braces and then parse the dict with `ast.literal_eval` – Joran Beasley Aug 15 '18 at 15:43
  • Possible duplicate of [Convert a String representation of a Dictionary to a dictionary?](https://stackoverflow.com/questions/988228/convert-a-string-representation-of-a-dictionary-to-a-dictionary) – colidyre Aug 15 '18 at 15:49
  • 1
    @JoranBeasley A stack doesn’t add anything over a simple count if you only have one kind of thing to match. Also, unless you have some reason to believe the string comes from a Python repr, there’s no reason to use `literal_eval`. It seems more likely to be a string of arbitrary code in JS than in Python—so either JSON works or nothing is any more likely to work short of writing a parser (and maybe even an interpreter). – abarnert Aug 15 '18 at 15:58
  • 1
    "I tried the suggested solution with regex with no succes." But did you try using `json.loads(...)` instead of `ast.literal_eval`? – cs95 Aug 15 '18 at 16:16
  • It works if I copy my string from this webpage, but not when I programmatically retrieve it through my script. Could it have something to do with linebreaks and the regex? – Bjorn Garcia Aug 15 '18 at 16:37
  • @abarnert ... sure a count is fine ... I was just anticipating next he would maybe want to find lists or something ... or strings :) and your definitely right that json.loads would probably work thanks :) – Joran Beasley Aug 15 '18 at 17:05

3 Answers3

2

Assuming there’s never more than one dictionary in the string, and there are no other braces, and the dictionary is in a JSON-compatible format (all of which are true for your example, but I have no idea if they’re true for all of your real data), you can just use simple string operations to extract it:

prefix, openbrace, rest = s.partition('{')
j, closebrace, suffix = rest.rpartition('}')
if openbrace and closebrace:
    j = openbrace + j + closebrace
    d = json.loads(j)

If those assumptions aren’t true, it gets a lot harder.

First, you need to extract matching brace pairs. Something like this:

braces = 0
start = None
for i, c in enumerate(s):
    if c == '{':
        if not braces: start = I
        braces += 1
    elif c == '}':
        braces -= 1
        if not braces:
            yield s[start:I+1]

Except that if there can be braces inside strings, you need to skip them—and you need to handle it in a way that works for strings in JavaScript or whatever your source language actually is, including the appropriate backslash-escaped quote rules and so on. And if your string can have HTML entity escapes for braces (and quotes) you need to deal win that, too.

And then you have to parse each dict. If they’re, say, strings of arbitrary JavaScript code, you can’t just parse them as JSON, or as Python dicts, because neither of those will handle perfectly valid JS objects like, say, {abc: 1} (notice no quotes around the key), so you’ll need to write your own parser.

And if the dicts aren’t even necessarily literals—e.g., {abc: spam}, where spam is the name of a variable, or a nontrivial expression, or something like that—then you’ll need to use a JS interpreter, feed it the appropriate environment, then interpret the code.

abarnert
  • 354,177
  • 51
  • 601
  • 671
0

Without using a regex, you have to iterate over the string while you see the first { and then save everything after that to the closing } (or use str.partition for that).

If there is a dict in the dict itself, you have also to count opening parantheses and only stop saving if a corresponding closing parenthesis was found.

If there are unmatched parantheses it is a problem anyway, not simple to solve. Better stick to a solution with a regex: https://stackoverflow.com/a/51861961/2648551

colidyre
  • 4,170
  • 12
  • 37
  • 53
0

you can use a stack to find matching braces without using a regex

import ast
def find_dicts(s):
    stack = []
    buffer=""
    for ch in s:
        if ch == "{":
            buffer += "{"
            stack.append(ch)
        elif ch == "}":
            stack.pop(-1)
            buffer += "}"
            if not stack:
              yield ast.literal_eval(buffer)
              buffer = ""   
        elif stack:
            buffer += ch

print(list(find_dicts("""Bla bla {"value": "1"} bla bla""")            ))

I think this should also handle nested dicts https://repl.it/repls/JuicyWoodenTranslation

as well as more than one dict in the string

Joran Beasley
  • 110,522
  • 12
  • 160
  • 179