I have a solution if you want to have more freedom to choose when a value should be overwritten in the merged dictionary. Maybe it's a verbose script, but it's not hard to understand its logic.
Thanks fabiocaccamo and senderle for sharing the benedict package, and the nested iteration logic in lists, respectively. This knowledge was fundamental to the script development.
Python Requirements
pip install python-benedict==0.24.3
Python Script
Definition of the Dict
class.
from __future__ import annotations
from collections.abc import Mapping
from benedict import benedict
from typing import Iterator
from copy import deepcopy
class Dict:
def __init__(self, data: dict = None):
"""
Instantiates a dictionary object with nested keys-based indexing.
Parameters
----------
data: dict
Dictionary.
References
----------
[1] 'Dict' class: https://stackoverflow.com/a/70908985/16109419
[2] 'Benedict' package: https://github.com/fabiocaccamo/python-benedict
[3] Dictionary nested iteration: https://stackoverflow.com/a/10756615/16109419
"""
self.data = deepcopy(data) if data is not None else {}
def get(self, keys: [object], **kwargs) -> (object, bool):
"""
Get dictionary item value based on nested keys.
Parameters
----------
keys: [object]
Nested keys to get item value based on.
Returns
-------
value, found: (object, bool)
Item value, and whether the target item was found.
"""
data = kwargs.get('data', self.data)
path = kwargs.get('path', [])
value, found = None, False
# Looking for item location on dictionary:
for outer_key, outer_value in data.items():
trace = path + [outer_key]
# Getting item value from dictionary:
if trace == keys:
value, found = outer_value, True
break
if trace == keys[:len(trace)] and isinstance(outer_value, Mapping): # Recursion cutoff.
value, found = self.get(
data=outer_value,
keys=keys,
path=trace
)
return value, found
def set(self, keys: [object], value: object, **kwargs) -> bool:
"""
Set dictionary item value based on nested keys.
Parameters
----------
keys: [object]
Nested keys to set item value based on.
value: object
Item value.
Returns
-------
updated: bool
Whether the target item was updated.
"""
data = kwargs.get('data', self.data)
path = kwargs.get('path', [])
updated = False
# Looking for item location on dictionary:
for outer_key, outer_value in data.items():
trace = path + [outer_key]
# Setting item value on dictionary:
if trace == keys:
data[outer_key] = value
updated = True
break
if trace == keys[:len(trace)] and isinstance(outer_value, Mapping): # Recursion cutoff.
updated = self.set(
data=outer_value,
keys=keys,
value=value,
path=trace
)
return updated
def add(self, keys: [object], value: object, **kwargs) -> bool:
"""
Add dictionary item value based on nested keys.
Parameters
----------
keys: [object]
Nested keys to add item based on.
value: object
Item value.
Returns
-------
added: bool
Whether the target item was added.
"""
data = kwargs.get('data', self.data)
added = False
# Adding item on dictionary:
if keys[0] not in data:
if len(keys) == 1:
data[keys[0]] = value
added = True
else:
data[keys[0]] = {}
# Looking for item location on dictionary:
for outer_key, outer_value in data.items():
if outer_key == keys[0]: # Recursion cutoff.
if len(keys) > 1 and isinstance(outer_value, Mapping):
added = self.add(
data=outer_value,
keys=keys[1:],
value=value
)
return added
def remove(self, keys: [object], **kwargs) -> bool:
"""
Remove dictionary item based on nested keys.
Parameters
----------
keys: [object]
Nested keys to remove item based on.
Returns
-------
removed: bool
Whether the target item was removed.
"""
data = kwargs.get('data', self.data)
path = kwargs.get('path', [])
removed = False
# Looking for item location on dictionary:
for outer_key, outer_value in data.items():
trace = path + [outer_key]
# Removing item from dictionary:
if trace == keys:
del data[outer_key]
removed = True
break
if trace == keys[:len(trace)] and isinstance(outer_value, Mapping): # Recursion cutoff.
removed = self.remove(
data=outer_value,
keys=keys,
path=trace
)
return removed
def items(self, **kwargs) -> Iterator[object, object]:
"""
Get dictionary items based on nested keys.
Returns
-------
keys, value: Iterator[object, object]
List of nested keys and list of values.
"""
data = kwargs.get('data', self.data)
path = kwargs.get('path', [])
for outer_key, outer_value in data.items():
if isinstance(outer_value, Mapping):
for inner_key, inner_value in self.items(data=outer_value, path=path + [outer_key]):
yield inner_key, inner_value
else:
yield path + [outer_key], outer_value
@staticmethod
def merge(dict_list: [dict], overwrite: bool = False, concat: bool = False, default_value: object = None) -> dict:
"""
Merges dictionaries, with value assignment based on order of occurrence. Overwrites values if and only if:
- The key does not yet exist on merged dictionary;
- The current value of the key on merged dictionary is the default value.
Parameters
----------
dict_list: [dict]
List of dictionaries.
overwrite: bool
Overwrites occurrences of values. If false, keep the first occurrence of each value found.
concat: bool
Concatenates occurrences of values for the same key.
default_value: object
Default value used as a reference to override dictionary attributes.
Returns
-------
md: dict
Merged dictionary.
"""
dict_list = [d for d in dict_list if d is not None and isinstance(d, dict)] if dict_list is not None else []
assert len(dict_list), f"no dictionaries given."
# Keeping the first occurrence of each value:
if not overwrite:
dict_list = [Dict(d) for d in dict_list]
for i, d in enumerate(dict_list[:-1]):
for keys, value in d.items():
if value != default_value:
for j, next_d in enumerate(dict_list[i+1:], start=i+1):
next_d.remove(keys=keys)
dict_list = [d.data for d in dict_list]
md = benedict()
md.merge(*dict_list, overwrite=True, concat=concat)
return md
Definition of the main
method to show examples.
import json
def main() -> None:
dict_list = [
{1: 'a', 2: None, 3: {4: None, 5: {6: None}}},
{1: None, 2: None, 3: {4: 'c', 5: {6: {7: None}}}},
{1: None, 2: 'b', 3: {4: None, 5: {6: {7: 'd'}}}},
{1: None, 2: 'b', 3: {4: None, 5: {6: {8: {9: {10: ['e', 'f']}}}}}},
{1: None, 2: 'b', 3: {4: None, 5: {6: {8: {9: {10: ['g', 'h']}}}}}},
]
d = Dict(data=dict_list[-1])
print("Dictionary operations test:\n")
print(f"data = {json.dumps(d.data, indent=4)}\n")
print(f"d = Dict(data=data)")
keys = [11]
value = {12: {13: 14}}
print(f"d.get(keys={keys}) --> {d.get(keys=keys)}")
print(f"d.set(keys={keys}, value={value}) --> {d.set(keys=keys, value=value)}")
print(f"d.add(keys={keys}, value={value}) --> {d.add(keys=keys, value=value)}")
keys = [11, 12, 13]
value = 14
print(f"d.add(keys={keys}, value={value}) --> {d.add(keys=keys, value=value)}")
value = 15
print(f"d.set(keys={keys}, value={value}) --> {d.set(keys=keys, value=value)}")
keys = [11]
print(f"d.get(keys={keys}) --> {d.get(keys=keys)}")
keys = [11, 12]
print(f"d.get(keys={keys}) --> {d.get(keys=keys)}")
keys = [11, 12, 13]
print(f"d.get(keys={keys}) --> {d.get(keys=keys)}")
keys = [11, 12, 13, 15]
print(f"d.get(keys={keys}) --> {d.get(keys=keys)}")
keys = [2]
print(f"d.remove(keys={keys}) --> {d.remove(keys=keys)}")
print(f"d.remove(keys={keys}) --> {d.remove(keys=keys)}")
print(f"d.get(keys={keys}) --> {d.get(keys=keys)}")
print("\n-----------------------------\n")
print("Dictionary values match test:\n")
print(f"data = {json.dumps(d.data, indent=4)}\n")
print(f"d = Dict(data=data)")
for keys, value in d.items():
real_value, found = d.get(keys=keys)
status = "found" if found else "not found"
print(f"d{keys} = {value} == {real_value} ({status}) --> {value == real_value}")
print("\n-----------------------------\n")
print("Dictionaries merge test:\n")
for i, d in enumerate(dict_list, start=1):
print(f"d{i} = {d}")
dict_list_ = [f"d{i}" for i, d in enumerate(dict_list, start=1)]
print(f"dict_list = [{', '.join(dict_list_)}]")
md = Dict.merge(dict_list=dict_list)
print("\nmd = Dict.merge(dict_list=dict_list)")
print("print(md)")
print(f"{json.dumps(md, indent=4)}")
if __name__ == '__main__':
main()
Output
Dictionary operations test:
data = {
"1": null,
"2": "b",
"3": {
"4": null,
"5": {
"6": {
"8": {
"9": {
"10": [
"g",
"h"
]
}
}
}
}
}
}
d = Dict(data=data)
d.get(keys=[11]) --> (None, False)
d.set(keys=[11], value={12: {13: 14}}) --> False
d.add(keys=[11], value={12: {13: 14}}) --> True
d.add(keys=[11, 12, 13], value=14) --> False
d.set(keys=[11, 12, 13], value=15) --> True
d.get(keys=[11]) --> ({12: {13: 15}}, True)
d.get(keys=[11, 12]) --> ({13: 15}, True)
d.get(keys=[11, 12, 13]) --> (15, True)
d.get(keys=[11, 12, 13, 15]) --> (None, False)
d.remove(keys=[2]) --> True
d.remove(keys=[2]) --> False
d.get(keys=[2]) --> (None, False)
-----------------------------
Dictionary values match test:
data = {
"1": null,
"3": {
"4": null,
"5": {
"6": {
"8": {
"9": {
"10": [
"g",
"h"
]
}
}
}
}
},
"11": {
"12": {
"13": 15
}
}
}
d = Dict(data=data)
d[1] = None == None (found) --> True
d[3, 4] = None == None (found) --> True
d[3, 5, 6, 8, 9, 10] = ['g', 'h'] == ['g', 'h'] (found) --> True
d[11, 12, 13] = 15 == 15 (found) --> True
-----------------------------
Dictionaries merge test:
d1 = {1: 'a', 2: None, 3: {4: None, 5: {6: None}}}
d2 = {1: None, 2: None, 3: {4: 'c', 5: {6: {7: None}}}}
d3 = {1: None, 2: 'b', 3: {4: None, 5: {6: {7: 'd'}}}}
d4 = {1: None, 2: 'b', 3: {4: None, 5: {6: {8: {9: {10: ['e', 'f']}}}}}}
d5 = {1: None, 2: 'b', 3: {4: None, 5: {6: {8: {9: {10: ['g', 'h']}}}}}}
dict_list = [d1, d2, d3, d4, d5]
md = Dict.merge(dict_list=dict_list)
print(md)
{
"1": "a",
"2": "b",
"3": {
"4": "c",
"5": {
"6": {
"7": "d",
"8": {
"9": {
"10": [
"e",
"f"
]
}
}
}
}
}
}