For absolute pure, unadulterated speed and boundless efficiency, the kinds of which could even cause the likes of Chuck Norris to take pause and helplessly look on in awe, I humbly recommend this remarkably well planned-out approach with __dict__
:
def dict(self):
_dict = self.__dict__.copy()
_dict['message_id'] = str(_dict['message_id'])
return _dict
For a class that defines a __slots__
attribute, such as with @dataclass(slots=True)
, the above approach most likely won't work, as the __dict__
attribute won't be available on class instances. In that case, a highly efficient "shoot for the moon" approach such as below could instead be viable:
def dict(self):
body_lines = ','.join(f"'{f}':" + (f'str(self.{f})' if f == 'message_id'
else f'self.{f}') for f in self.__slots__)
# Compute the text of the entire function.
txt = f'def dict(self):\n return {{{body_lines}}}'
ns = {}
exec(txt, locals(), ns)
_dict_fn = self.__class__.dict = ns['dict']
return _dict_fn(self)
In case anyone's teetering at the edge of their seats right now (I know, this is really incredible, breakthrough-level stuff) - I've added my personal timings via the timeit
module below, that should hopefully shed a little more light in the performance aspect of things.
FYI, the approaches with pure __dict__
are inevitably much faster than dataclasses.asdict()
.
Note: Even though __dict__
works better in this particular case, dataclasses.asdict()
will likely be better for composite dictionaries, such as ones with nested dataclasses, or values with mutable types such as dict
or list
.
from dataclasses import dataclass, asdict, field
from uuid import UUID, uuid4
class DictMixin:
"""Mixin class to add a `dict()` method on classes that define a __slots__ attribute"""
def dict(self):
body_lines = ','.join(f"'{f}':" + (f'str(self.{f})' if f == 'message_id'
else f'self.{f}') for f in self.__slots__)
# Compute the text of the entire function.
txt = f'def dict(self):\n return {{{body_lines}}}'
ns = {}
exec(txt, locals(), ns)
_dict_fn = self.__class__.dict = ns['dict']
return _dict_fn(self)
@dataclass
class MessageHeader:
message_id: UUID = field(default_factory=uuid4)
string: str = 'a string'
integer: int = 1000
floating: float = 1.0
def dict1(self):
_dict = self.__dict__.copy()
_dict['message_id'] = str(_dict['message_id'])
return _dict
def dict2(self):
return {k: str(v) if k == 'message_id' else v
for k, v in self.__dict__.items()}
def dict3(self):
return {k: str(v) if k == 'message_id' else v
for k, v in asdict(self).items()}
@dataclass(slots=True)
class MessageHeaderWithSlots(DictMixin):
message_id: UUID = field(default_factory=uuid4)
string: str = 'a string'
integer: int = 1000
floating: float = 1.0
def dict2(self):
return {k: str(v) if k == 'message_id' else v
for k, v in asdict(self).items()}
if __name__ == '__main__':
from timeit import timeit
header = MessageHeader()
header_with_slots = MessageHeaderWithSlots()
n = 10000
print('dict1(): ', timeit('header.dict1()', number=n, globals=globals()))
print('dict2(): ', timeit('header.dict2()', number=n, globals=globals()))
print('dict3(): ', timeit('header.dict3()', number=n, globals=globals()))
print('slots -> dict(): ', timeit('header_with_slots.dict()', number=n, globals=globals()))
print('slots -> dict2(): ', timeit('header_with_slots.dict2()', number=n, globals=globals()))
print()
dict__ = header.dict1()
print(dict__)
asdict__ = header.dict3()
print(asdict__)
assert isinstance(dict__['message_id'], str)
assert isinstance(dict__['integer'], int)
assert header.dict1() == header.dict2() == header.dict3()
assert header_with_slots.dict() == header_with_slots.dict2()
Results on my Mac M1 laptop:
dict1(): 0.005992999998852611
dict2(): 0.00800508284009993
dict3(): 0.07069579092785716
slots -> dict(): 0.00583599996753037
slots -> dict2(): 0.07395245810039341
{'message_id': 'b4e17ef9-1a58-4007-9cef-39158b094da2', 'string': 'a string', 'integer': 1000, 'floating': 1.0}
{'message_id': 'b4e17ef9-1a58-4007-9cef-39158b094da2', 'string': 'a string', 'integer': 1000, 'floating': 1.0}
Note: For a more "complete" implementation of DictMixin
(named as SerializableMixin
), check out a related answer I had also added.