Notice the following interaction with the interpreter:
>>> import sys
>>> array = ['this', 'is', 'a', 'string', 'array']
>>> sys.getsizeof(array)
56
>>> list(map(sys.getsizeof, array))
[29, 27, 26, 31, 30]
>>> sys.getsizeof(array) + sum(map(sys.getsizeof, array))
199
>>>
The answer in this specific case is to use sys.getsizeof(array) + sum(map(sys.getsizeof, array))
to find the size of a list of strings. However, the following would be a more complete implementation that takes into account object containers, classes, and the usages of __slots__.
import sys
def sizeof(obj):
return _sizeof(obj, set())
def _sizeof(obj, memo):
# Add this object's size just once.
location = id(obj)
if location in memo:
return 0
memo.add(location)
total = sys.getsizeof(obj)
# Look for any class instance data.
try:
obj = vars(obj)
except TypeError:
pass
# Handle containers holding objects.
if isinstance(obj, (tuple, list, frozenset, set)):
for item in obj:
total += _sizeof(item, memo)
# Handle the two-sided nature of dicts.
elif isinstance(obj, dict):
for key, value in dict.items():
total += _sizeof(key, memo) + _sizeof(value, memo)
# Handle class instances using __slots__.
elif hasattr(obj, '__slots__'):
for key, value in ((name, getattr(obj, name))
for name in obj.__slots__ if hasattr(obj, name)):
total += _sizeof(key, memo) + _sizeof(value, memo)
return total
Edit:
After approaching this problem a while later, the following alternative was devised. Please note that it does not work well with infinite iterators. This code is best for static data structures ready for analysis.
import sys
sizeof = lambda obj: sum(map(sys.getsizeof, explore(obj, set())))
def explore(obj, memo):
loc = id(obj)
if loc not in memo:
memo.add(loc)
yield obj
# Handle instances with slots.
try:
slots = obj.__slots__
except AttributeError:
pass
else:
for name in slots:
try:
attr = getattr(obj, name)
except AttributeError:
pass
else:
yield from explore(attr, memo)
# Handle instances with dict.
try:
attrs = obj.__dict__
except AttributeError:
pass
else:
yield from explore(attrs, memo)
# Handle dicts or iterables.
for name in 'keys', 'values', '__iter__':
try:
attr = getattr(obj, name)
except AttributeError:
pass
else:
for item in attr():
yield from explore(item, memo)