-1

I have a list of paths that need to be converted to a dict

[
    "/company/accounts/account1/accountId=11111",
    "/company/accounts/account1/accountName=testacc",
    "/company/accounts/account1/environment=test",
    "/company/accounts/account2/accountId=22222",
    "/company/accounts/account2/accountName=stageacc",
    "/company/accounts/account2/environment=stage",
    "/program/releases/program1/stage/version=1.1",
    "/program/releases/program1/stage/date=2021-02-01",
    "/program/releases/program1/prod/version=1.0",
    "/program/releases/program1/prod/date=2021-01-15",
]

Here is what it should look like:

{
    "company": {
        "accounts": {
            "account1": {
                "accountId": 11111,
                "accountName": "testacc",
                "environment": "test"
            },
            "account2": {
                "accountId": 22222,
                "accountName": "stageacc",
                "environment": "stage"
            }
        }
    },
    "program": {
        "releases": {
            "program1": {
                "stage": {
                    "version": "1.1",
                    "date": "2021-02-01"
                },
                "prod": {
                    "version": "1.0",
                    "date": "2021-01-15"
                }
            }
        }
    }
}

I am trying to solve this iteratively but I can't seem to get it to work. Not sure what is the right approach here when it comes to nested dictionaries.

Here is my code:

class Deserialize:
    def __init__(self):
        self.obj = {}

    def deserialize_iteratively(self, paths):
        
        def helper(path):
            path_elements = path.split('/')
            for e in path_elements[::-1]:
                if "=" in e:
                    k,v = e.split("=")
                    self.obj[k] = v
                else:
                    tmp = {}
                    tmp[e] = self.obj
                    self.obj = tmp
            return self.obj
        
        for path in paths:
            helper(path)
        return self.obj

And the erroneous output this generates with first two paths:

{'': {'company': {'accounts': {'account1': {'': {'company': {'accounts': {'account1': {'accountId': '11111'}}}},
                                            'accountName': 'testacc'}}}}}
Savvy
  • 82
  • 1
  • 8
  • Please supply the expected [minimal, reproducible example](https://stackoverflow.com/help/minimal-reproducible-example) (MRE). We should be able to copy and paste a contiguous block of your code, execute that file, and reproduce your problem along with tracing output for the problem points. This lets us test our suggestions against your test data and desired output. "faulty" is not a problem specification. – Prune May 07 '21 at 19:23

3 Answers3

4

You'll want to use .setdefault() to create the dicts as you dig through the path.

from pprint import pprint

s = [
    "/company/accounts/account1/accountId=11111",
    "/company/accounts/account1/accountName=testacc",
    "/company/accounts/account1/environment=test",
    "/company/accounts/account2/accountId=22222",
    "/company/accounts/account2/accountName=stageacc",
    "/company/accounts/account2/environment=stage",
    "/program/releases/program1/stage/version=1.1",
    "/program/releases/program1/stage/date=2021-02-01",
    "/program/releases/program1/prod/version=1.0",
    "/program/releases/program1/prod/date=2021-01-15",
]

root = {}

for path in s:
    # separate by slashes, disregarding the first `/`
    path = path.lstrip("/").split("/")
    # pop off the last key-value component
    key, _, val = path.pop(-1).partition("=")
    # find the target dict starting from the root
    target_dict = root
    for component in path:
        target_dict = target_dict.setdefault(component, {})
    # assign key-value
    target_dict[key] = val

pprint(root)

Outputs:

{'company': {'accounts': {'account1': {'accountId': '11111',
                                       'accountName': 'testacc',
                                       'environment': 'test'},
                          'account2': {'accountId': '22222',
                                       'accountName': 'stageacc',
                                       'environment': 'stage'}}},
 'program': {'releases': {'program1': {'prod': {'date': '2021-01-15',
                                                'version': '1.0'},
                                       'stage': {'date': '2021-02-01',
                                                 'version': '1.1'}}}}}
AKX
  • 152,115
  • 15
  • 115
  • 172
  • Thank you for the very readable answer. However, I am not able to understand the part where you find the target dict. Could you explain what is happening there please :) – Savvy May 07 '21 at 20:57
  • 1
    `setdefault()` either gets something with the given key, or if it doesn't exist, it assigns the given value to it before returning it. So, starting from the root, this basically iterates down to create the dict structure before finally assigning the final key/value leaf. – AKX May 07 '21 at 21:03
3

You can use recursion with collections.defaultdict:

import collections as cl, re, json
def to_tree(data):
   d = cl.defaultdict(list)
   for a, *b in data:
      d[a].append(b)
   return {a:b[0][0] if len(b) == 1 else to_tree(b) for a, b in d.items()}

vals = ['/company/accounts/account1/accountId=11111', '/company/accounts/account1/accountName=testacc', '/company/accounts/account1/environment=test', '/company/accounts/account2/accountId=22222', '/company/accounts/account2/accountName=stageacc', '/company/accounts/account2/environment=stage', '/program/releases/program1/stage/version=1.1', '/program/releases/program1/stage/date=2021-02-01', '/program/releases/program1/prod/version=1.0', '/program/releases/program1/prod/date=2021-01-15']
result = to_tree([[*filter(None, re.split('\=|/', i))] for i in vals])
print(json.dumps(result, indent=4))

Output:

{
    "company": {
        "accounts": {
            "account1": {
                "accountId": "11111",
                "accountName": "testacc",
                "environment": "test"
            },
            "account2": {
                "accountId": "22222",
                "accountName": "stageacc",
                "environment": "stage"
            }
        }
    },
    "program": {
        "releases": {
            "program1": {
                "stage": {
                    "version": "1.1",
                    "date": "2021-02-01"
                },
                "prod": {
                    "version": "1.0",
                    "date": "2021-01-15"
                }
            }
        }
    }
}
Ajax1234
  • 69,937
  • 8
  • 61
  • 102
2
from collections import defaultdict
import json

def nested_dict():
   """
   Creates a default dictionary where each value is an other default dictionary.
   """
   return defaultdict(nested_dict)

def default_to_regular(d):
    """
    Converts defaultdicts of defaultdicts to dict of dicts.
    """
    if isinstance(d, defaultdict):
        d = {k: default_to_regular(v) for k, v in d.items()}
    return d

def get_path_dict(paths):
    new_path_dict = nested_dict()
    for path in paths:
        parts = path.split('/')
        if parts:
            marcher = new_path_dict
            for key in parts[:-1]:
               marcher = marcher[key]
            marcher[parts[-1]] = parts[-1]
    return default_to_regular(new_path_dict)

l1 = ['foo/e.txt','foo/bar/a.txt','foo/bar/b.cfg','foo/bar/c/d.txt', 'test.txt']
result = get_path_dict(l1)
print(json.dumps(result, indent=2))
ZahraRezaei
  • 251
  • 2
  • 14