3

I have a json file that looks something like the following:

[
  {
     "category1":"0120391123123"
  },
  [
     {
        "subcategory":"0120391123123"
     },
     [
        {
           "subsubcategory":"019301948109"
        },
        [
           {
              "subsubsubcategory":"013904123908"
           },
           [
              {
                 "subsubsubsubcategory":"019341823908"
              }
           ]
        ]
     ]
  ],
  [
     {
        "subcategory2":"0934810923801"
     },
     [
        {
           "subsubcategory2":"09341829308123"
        }
     ]
  ],
  [
     {
        "category2":"1309183912309"
     },
     [
        {
           "subcategory":"10293182094"
        }
     ]
  ]
]

I also have a list of categories that I would like to find in the original list. If the category exists in categoriesToFind, I would also like to find all subcategories and return those as well.

categoriesToFind = ['019301948109', '1309183912309']

finalCategories = []

def findCategories(currentList, isFirstIteration):
    for x in currentList:
        if type(x) is dict and (next(iter(x.values())) in categoriesToFind or not isFirstIteration):
            finalCategories.append(next(iter(x.values())))
            if len(currentList) < currentList.index(x) + 1:
                findCategories(currentList[currentList.index(x) + 1], False)

findCategories(data, True)

I would want finalCategories to contain the following:

['019301948109', '013904123908', '019341823908', '1309183912309', '10293182094']
Evan Hessler
  • 297
  • 3
  • 19

1 Answers1

3

You can use recursion with a generator:

categoriesToFind = ['019301948109', '1309183912309']
d = [{'category1': '0120391123123'}, [{'subcategory': '0120391123123'}, [{'subsubcategory': '019301948109'}, [{'subsubsubcategory': '013904123908'}, [{'subsubsubsubcategory': '019341823908'}]]]], [{'subcategory2': '0934810923801'}, [{'subsubcategory2': '09341829308123'}]], [{'category2': '1309183912309'}, [{'subcategory': '10293182094'}]]]
def get_subcategories(_d, _flag):
   flag = None
   for i in _d:
     if isinstance(i, dict):
       _val = list(i.values())[0]
       if _val in categoriesToFind or _flag:
         yield _val
         flag = True
     else:
        yield from get_subcategories(i, _flag or flag)

print(list(get_subcategories(d, False)))

Output:

['019301948109', '013904123908', '019341823908', '1309183912309', '10293182094']
Ajax1234
  • 69,937
  • 8
  • 61
  • 102
  • Thanks for the help! If anyone stumbles on this, you can do this in python 2.7.X with this: https://stackoverflow.com/questions/17581332/converting-yield-from-statement-to-python-2-7-code – Evan Hessler Nov 20 '18 at 04:22