1

Hi i am trying to denormalize/flatten a JSON to dictionary, in the following i have a generic way of flattening the JSON, the function below somehow dose not take care of nested JSON. The Goal is to flatten the nested dict/json to a CSV compatible objects.

I've used the following code:

def flatten(s):
    for i in s:
        if not isinstance(s[i], dict):
            yield (i, s[i])
        else:
            for b in flatten(s[i]):
                yield b


new_data = dict(list(flatten(mydict)))

print(new_data)

With this input dictionary

mydict = "{
  'G_TRANSACTIONS': {
    'INVOICE_NUMBER': '31002',
    'TRANSACTION_CLASS': 'Invoice',
    'LIST_G_LINES': {
      'G_LINES': [
        {
          'LN_LINE_NUMBER': '1',
          'LN_LINE_TYPE': 'Line',
          'LN_DESCRIPTION': 'PKG980N-MAIN STREET 16C POCKET PLUGS',
          'LN_UNIT_OF_MEASURE': 'EA',
          'LN_QUANTITY': '3',
          'LN_NET_SELLING_PRICE': '200',
          'LN_NET_EXTENDED_AMOUNT': '600',
          'LIST_G_LINES_ITEM_NUMBER': {
            'G_LINES_ITEM_NUMBER': {
              'ITEM_NUMBER': '136864001'
            }
          }
        },
        {
          'LN_LINE_NUMBER': '1',
          'LN_LINE_TYPE': 'Other Line',
          'LN_DESCRIPTION': 'Johar Town Lahore',
          'LN_UNIT_OF_MEASURE': 'EA',
          'LN_QUANTITY': '3',
          'LN_NET_SELLING_PRICE': '200',
          'LN_NET_EXTENDED_AMOUNT': '999',
          'LIST_G_LINES_ITEM_NUMBER': {
            'G_LINES_ITEM_NUMBER': {
              'ITEM_NUMBER': '99999999'
            }
          }
        }
      ]
    },
    'TR_LN_AMOUNT': '600'
  }
}"

they output i get is:

{
  'INVOICE_NUMBER': '31002',
  'TRANSACTION_CLASS': 'Invoice',
  'G_LINES': [
    {
      'LN_LINE_NUMBER': '1',
      'LN_LINE_TYPE': 'Line',
      'LN_DESCRIPTION': 'PKG980N-MAIN STREET 16C POCKET PLUGS',
      'LN_UNIT_OF_MEASURE': 'EA',
      'LN_QUANTITY': '3',
      'LN_NET_SELLING_PRICE': '200',
      'LN_NET_EXTENDED_AMOUNT': '600',
      'LIST_G_LINES_ITEM_NUMBER': {
        'G_LINES_ITEM_NUMBER': {
          'ITEM_NUMBER': '136864001'
        }
      }
    },
    {
      'LN_LINE_NUMBER': '1',
      'LN_LINE_TYPE': 'Other Line',
      'LN_DESCRIPTION': 'Johar Town Lahore',
      'LN_UNIT_OF_MEASURE': 'EA',
      'LN_QUANTITY': '3',
      'LN_NET_SELLING_PRICE': '200',
      'LN_NET_EXTENDED_AMOUNT': '999',
      'LIST_G_LINES_ITEM_NUMBER': {
        'G_LINES_ITEM_NUMBER': {
          'ITEM_NUMBER': '99999999'
        }
      }
    }
  ],
  'TR_LN_AMOUNT': '600'
}

Desired Output

{
  'INVOICE_NUMBER': '31002',
  'TRANSACTION_CLASS': 'Invoice',
  'LN_LINE_NUMBER': '1',
  'LN_LINE_TYPE': 'Line',
  'LN_DESCRIPTION': 'PKG980N-MAIN STREET 16C POCKET PLUGS',
  'LN_UNIT_OF_MEASURE': 'EA',
  'LN_QUANTITY': '3',
  'LN_NET_SELLING_PRICE': '200',
  'LN_NET_EXTENDED_AMOUNT': '600',
  'ITEM_NUMBER': '136864001',
  'TR_LN_AMOUNT': '600'
}
,
{
'INVOICE_NUMBER': '31002',
'TRANSACTION_CLASS': 'Invoice',
'LN_LINE_NUMBER': '1',
'LN_LINE_TYPE': 'Other Line',
'LN_DESCRIPTION': 'Johar Town Lahore',
'LN_UNIT_OF_MEASURE': 'EA',
'LN_QUANTITY': '3',
'LN_NET_SELLING_PRICE': '200',
'LN_NET_EXTENDED_AMOUNT': '999',
'ITEM_NUMBER': '99999999',
'TR_LN_AMOUNT': '600'
}
C.Nivs
  • 12,353
  • 2
  • 19
  • 44
noobie-php
  • 6,817
  • 15
  • 54
  • 101

1 Answers1

1

You can try this script, it will flatten the mydict to correct form:

mydict = {
  'G_TRANSACTIONS': {
    'INVOICE_NUMBER': '31002',
    'TRANSACTION_CLASS': 'Invoice',
    'LIST_G_LINES': {
      'G_LINES': [
        {
          'LN_LINE_NUMBER': '1',
          'LN_LINE_TYPE': 'Line',
          'LN_DESCRIPTION': 'PKG980N-MAIN STREET 16C POCKET PLUGS',
          'LN_UNIT_OF_MEASURE': 'EA',
          'LN_QUANTITY': '3',
          'LN_NET_SELLING_PRICE': '200',
          'LN_NET_EXTENDED_AMOUNT': '600',
          'LIST_G_LINES_ITEM_NUMBER': {
            'G_LINES_ITEM_NUMBER': {
              'ITEM_NUMBER': '136864001'
            }
          }
        },
        {
          'LN_LINE_NUMBER': '1',
          'LN_LINE_TYPE': 'Other Line',
          'LN_DESCRIPTION': 'Johar Town Lahore',
          'LN_UNIT_OF_MEASURE': 'EA',
          'LN_QUANTITY': '3',
          'LN_NET_SELLING_PRICE': '200',
          'LN_NET_EXTENDED_AMOUNT': '999',
          'LIST_G_LINES_ITEM_NUMBER': {
            'G_LINES_ITEM_NUMBER': {
              'ITEM_NUMBER': '99999999'
            }
          }
        }
      ]
    },
    'TR_LN_AMOUNT': '600'
  }
}


def flatten(d, depth=0):
    rv = [({}, depth)]
    if isinstance(d, dict):
        for k, v in d.items():
            if not isinstance(v, dict) and not isinstance(v, list):
                for i in rv:
                    i[0][k] = v
            else:
                for (vv, _depth) in flatten(v,depth+1):
                    rv.append((rv[-1][0].copy(), _depth))
                    for kkk, vvv in vv.items():
                        rv[-1][0][kkk] = vvv
    elif isinstance(d, list):
        for v in d:
            rv.append((rv[-1][0].copy(), depth+1))
            for (vv, _) in flatten(v,depth+1):
                for kkk, vvv in vv.items():
                    rv[-1][0][kkk] = vvv
    for i, _depth in rv:
        yield i, _depth

from itertools import groupby

out = []
for v, g in groupby(sorted(flatten(mydict), key=lambda k: -k[1]), lambda k: k[1]):
    out.extend(i[0] for i in g)
    break

from pprint import pprint
for d in out:
    pprint(d)
    print('-' * 80)

Prints:

{'INVOICE_NUMBER': '31002',
 'ITEM_NUMBER': '136864001',
 'LN_DESCRIPTION': 'PKG980N-MAIN STREET 16C POCKET PLUGS',
 'LN_LINE_NUMBER': '1',
 'LN_LINE_TYPE': 'Line',
 'LN_NET_EXTENDED_AMOUNT': '600',
 'LN_NET_SELLING_PRICE': '200',
 'LN_QUANTITY': '3',
 'LN_UNIT_OF_MEASURE': 'EA',
 'TRANSACTION_CLASS': 'Invoice',
 'TR_LN_AMOUNT': '600'}
--------------------------------------------------------------------------------
{'INVOICE_NUMBER': '31002',
 'ITEM_NUMBER': '99999999',
 'LN_DESCRIPTION': 'Johar Town Lahore',
 'LN_LINE_NUMBER': '1',
 'LN_LINE_TYPE': 'Other Line',
 'LN_NET_EXTENDED_AMOUNT': '999',
 'LN_NET_SELLING_PRICE': '200',
 'LN_QUANTITY': '3',
 'LN_UNIT_OF_MEASURE': 'EA',
 'TRANSACTION_CLASS': 'Invoice',
 'TR_LN_AMOUNT': '600'}
--------------------------------------------------------------------------------
Andrej Kesely
  • 168,389
  • 15
  • 48
  • 91