assuming you are stuck with the current data representation you can avoid a lot of trouble dealing with how nested it is by using the flatten_data
from my answer here this can transform your data structure into a dictionary like this:
{(0, 'class'): 'class_A',
(0, 'users', 'user_a', 0, 'email', 0): 'aaa1@email.com',
(0, 'users', 'user_a', 0, 'email', 1): 'aaa2@email.com',
(0, 'users', 'user_b', 0, 'email', 0): 'bbb1@email.com',
(1, 'class'): 'class_B',
(1, 'users', 'user_c', 0, 'email', 0): 'aaa1@email.com',
(1, 'users', 'user_c', 0, 'email', 1): 'ccc@email.com',
(1, 'users', 'user_d', 0, 'email', 0): 'ddd1@email.com'}
This is a bit easier to handle since now you are dealing with a key which is a sequence of indices only some of which you care about, and the element is either the class or an email.
The following solution just goes over all fields, skipping "class"
only since everything else is an email.
data = [{'users': {'user_a': [{'email': ['aaa1@email.com', 'aaa2@email.com']}], 'user_b': [{'email': ['bbb1@email.com']}]}, 'class': 'class_A'}, {'users': {'user_d': [{'email': ['ddd1@email.com']}], 'user_c': [{'email': ['aaa1@email.com', 'ccc@email.com']}]}, 'class': 'class_B'}]
# traverse and flatten_data are copied from https://stackoverflow.com/a/36582214/5827215
def traverse(obj, prev_path = "obj", path_repr = "{}[{!r}]".format):
if isinstance(obj,dict):
it = obj.items()
elif isinstance(obj,list):
it = enumerate(obj)
else:
yield prev_path,obj
return
for k,v in it:
yield from traverse(v, path_repr(prev_path,k), path_repr)
def _tuple_concat(tup, idx):
return (*tup, idx)
def flatten_data(obj):
"""converts nested dict and list structure into a flat dictionary with tuple keys
corresponding to the sequence of indices to reach particular element"""
return dict(traverse(obj, (), _tuple_concat))
# !! THIS IS FOR YOU
def extract_groups(flattened_data, matching_email):
for path, elem in flattened_data.items():
# path will have format like (0, 'users', 'user_b', 0, 'email', 0)
# elem is an email address
# skip class mentions, we will retrieve these as needed
if len(path) == 2 and path[1] == "class":
continue
# final element will match the given email?
if elem == matching_email:
# unpack useful elements of path
[cls_idx, _, username, *_] = path
cls = flattened_data[cls_idx, 'class']
yield cls, username
new_data = flatten_data(data)
##import pprint
##pprint.pprint(new_data)
print(*extract_groups(new_data, "aaa1@email.com"), sep="\n")
This does work for your sample outputting:
('class_A', 'user_a')
('class_B', 'user_c')
But any extra fields would cause problems since it would visit those thinking it is an email. so the extracting function should be written to rely on consistent structures in the data, using path[2]
to refer to the user id may not be stable but there may be another way of writing it, etc.