9

I have two classes A and B, each one storing references to objects of the other class in lists:

class A:
    def __init__(self,name):
        self.name = name
        self.my_Bs = []
    def registerB(self,b):
        self.my_Bs.append(b)

class B:
    def __init__(self,name):
        self.name = name
        self.my_As = []
    def registerA(self,a):
        self.my_As.append(a)

Now, my app builds two lists, one of objects of A, one of objects of B, having cross references.

# a list of As, a list of Bs
list_of_As = [A('firstA'), A('secondA')]
list_of_Bs = [B('firstB'), B('secondB')]
# example of one cross-reference
list_of_As[0].registerB(list_of_Bs[1])
list_of_Bs[1].registerA(list_of_As[0])

Obviously, if I call json.dumps() on either list_of_..., I get a circular reference error.

What I want to do to circumvent that issue is to dump JSON with list of elements name attributes instead of lists of objects themselves:

# This is what I want to obtain for
# the JSON for list_of_As
[
    {'name' : 'firstA', 'my_Bs': ['secondB']},
    {'name' : 'secondA', 'my_Bs': []}
]

The only way I can think of is to maintain in each class an additional list of strings (respectively my_Bs_names and my_As_names) and to use JSONEncoder as follows:

class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, 'A'):
            return { # filter out the list of B objects
                k: v for k, v in obj.__dict__.items() if k != 'my_Bs'
            }
        if isinstance(obj, 'B'):
            return { # filter out the list of A objects
                k: v for k, v in obj.__dict__.items() if k != 'my_As'
            }
        return super(MyEncoder, self).default(obj)

# Use the custom encoder to dump JSON for list_of_As
print json.dumps(list_of_As, cls=MyEncoder)

If I am not mistaken, I would get the following result:

# This is what I obtain for
# the JSON for list_of_As with the code above
[
    {'name' : 'firstA', 'my_Bs_names': ['secondB']},
    {'name' : 'secondA', 'my_Bs_names': []}
]

Is there a more elegant way of getting this result? For instance one that does not require any additional lists of strings?

rpanai
  • 12,515
  • 2
  • 42
  • 64
Silverspur
  • 891
  • 1
  • 12
  • 33

4 Answers4

6

General JSONEncoder class that prevents circular reference error

The following encoder class MyEncoder performs recursive encoding of the nested objects until a circular reference is detected, whose "name" attribute is returned instead of the object itself.

import json
class MyEncoder(json.JSONEncoder):
    def __init__(self, *args, **argv):
        super().__init__(*args, **argv)
        self.proc_objs = []
    def default(self, obj):
        if isinstance(obj,(A,B)):
            if obj in self.proc_objs:
                return obj.name # short circle the object dumping
            self.proc_objs.append(obj)
            return obj.__dict__
        return obj

json.dumps(list_of_As, cls=MyEncoder, check_circular=False, indent=2)

Output:

[
  { "name": "firstA",
    "my_Bs": [
      { "name": "secondB",
        "my_As": [ "firstA" ]
      }
    ]
  },
  { "name": "secondA", "my_Bs": [] }
]

Using a custom toJSON method

You can implement a serializer method in your classes.

class JSONable:
    def toJSON(self):
        d = dict()
        for k,v in self.__dict__.items():
            # save a list of "name"s of the objects in "my_As" or "my_Bs"
            d[k] = [o.name for o in v] if isinstance(v, list) else v
        return d

class A(JSONable):
    def __init__(self,name):
        self.name = name
        self.my_Bs = []
    def register(self,b):
        self.my_Bs.append(b)

class B(JSONable):
    def __init__(self,name):
        self.name = name
        self.my_As = []
    def register(self,a):
        self.my_As.append(a)

json.dumps(list_of_As, default=lambda x: x.toJSON(), indent=2)

Output:

[
  { "name":  "firstA",  "my_Bs": [  "secondB" ] },
  { "name":  "secondA", "my_Bs": [] }
]
gdlmx
  • 6,479
  • 1
  • 21
  • 39
  • I’m not sure that calling your class “generic” is applicable, since your solution (like mine) explicitly tests for specific classes, and requires a `.name` attribute. Note that using a list to track already-seen objects means your solution will degrade quadraticly with every increase in the number of objects to serialise (as `in` on a list takes O(N) time and you test objects N times). – Martijn Pieters Mar 07 '19 at 23:29
4

The best-practice approach is to record the id() values of objects already seen, when encoding. id() values are unique for objects with overlapping lifetimes, and when encoding, you can generally count on the objects not being short-lived. This works on any object type, and doesn't require the objects to be hashable.

Both the copy and pickle modules use this technique in a memo dictionary that maps id() values to their object for later reference.

You can use this technique here too; you really only need to keep a set of the ids to detect that you can return the .name attribute. Using a set makes testing for repeated references fast and efficient (membership testing takes O(1) constant time, as opposed to lists, which take O(N) linear time):

class CircularEncoder(json.JSONEncoder):
    def __init__(self, *args, **kwargs):
        kwargs['check_circular'] = False  # no need to check anymore
        super(CircularEncoder, self).__init__(*args, **kwargs)
        self._memo = set()

    def default(self, obj):
        if isinstance(obj, (A, B)):
            d = id(obj)
            if d in self._memo:
                return obj.name
            self._memo.add(d)
            return vars(obj)
        return super(CircularEncoder, self).default(obj)

then use json.dumps() with this class:

json.dumps(list_of_As, cls=CircularEncoder)

For your sample input, this produces:

>>> print(json.dumps(list_of_As, cls=CircularEncoder, indent=2))
[
  {
    "name": "firstA",
    "my_Bs": [
      {
        "name": "secondB",
        "my_As": [
          "firstA"
        ]
      }
    ]
  },
  {
    "name": "secondA",
    "my_Bs": []
  }
]
Martijn Pieters
  • 1,048,767
  • 296
  • 4,058
  • 3,343
0

How about this?

  • Classes like A and B only need to specify a class attribute (_deep_fields) listing their attributes that may lead to circular dependencies (need to be "shallow"-serialized)
  • They also need to inherit from ShallowSerializable which simply ignores attributes in _deep_fields if shallow is True
  • The encoder encodes all keys of the object, but calls make_shallow on all values, to make sure that shallow=True is sent to any objects that inherit from ShallowSerializable
  • The solution is generic, in the sense that any other class that needs to implement this behavior only needs to inherit from ShallowSerializable & define _deep_fields.
class ShallowSerializable(object):
     _deep_fields = set()
     def get_dict(self,  shallow=False):
         return {
             k: v
             for k, v in self.__dict__.items()
             if not shallow or k not in self._deep_fields
         }

class A(ShallowSerializable):
    _deep_fields = {'my_Bs'}

    def __init__(self,name):
        self.name = name
        self.my_Bs = []

     def registerB(self,b):
        self.my_Bs.append(b)

class B(ShallowSerializable):
    _deep_fields = {'my_As'}

    def __init__(self,name):
        self.name = name
        self.my_As = []

    def registerA(self,a):
        self.my_As.append(a)


class MyEncoder(json.JSONEncoder):
    def make_shallow(self, obj):
        if isinstance(obj, ShallowSerializable):
            return obj.get_dict(shallow=True)
        elif isinstance(obj, dict):
            return {k: self.make_shallow(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [self.make_shallow(x) for x in obj]
        else:
            return obj

    def default(self, obj):
        return {
            k: self.make_shallow(v)
            for k, v in obj.__dict__.items()
        }

Usage:

list_of_As = [A('firstA'), A('secondA')]
list_of_Bs = [B('firstB'), B('secondB')]
# example of one cross-reference
list_of_As[0].registerB(list_of_Bs[1])
list_of_Bs[1].registerA(list_of_As[0])

json.dumps(list_of_As, cls=MyEncoder)
>>> '[{"my_Bs": [{"name": "secondB"}], "name": "firstA"}, {"my_Bs": [], "name": "secondA"}]'

json.dumps(list_of_Bs, cls=MyEncoder)
>>> '[{"my_As": [], "name": "firstB"}, {"my_As": [{"name": "firstA"}], "name": "secondB"}]'
tomas
  • 963
  • 6
  • 19
-1

You Can do it by changing what string representation of object or say representation of python object made through python magic method this how many libraries changing their console and string representation insted of using hexes of class in return

Run Code Here

import json
class A:
    def __init__(self,name):
        self.name = name
        self.my_Bs = []

    def registerB(self,b):
        self.my_Bs.append(b)

    def __str__(self):
        _storage = {
            "name" : self.name,
            "my_Bs": [obj.name for obj in self.my_Bs]
        }
        return json.dumps(_storage)

    __repr__ = __str__

class B:
    def __init__(self,name):
        self.name = name
        self.my_As = []

    def registerA(self,a):
        self.my_As.append(a)

    def __str__(self):
        _storage = {
            "name" : self.name,
            "my_Bs" : [obj.name for obj in self.my_As]
        }
        return json.dumps(_storage)

    __repr__ = __str__


# a list of As, a list of Bs
list_of_As = [A('firstA'), A('secondA')]
list_of_Bs = [B('firstB'), B('secondB')]
# example of one cross-reference
list_of_As[0].registerB(list_of_Bs[1])
list_of_Bs[1].registerA(list_of_As[0])
str(list_of_As) # will make it done without  more overhead

You can also now optimise your code because it is just changing your representation how it represented without extra class bundle

Akash
  • 2,795
  • 1
  • 7
  • 11