It's not clear from the question what precisely your requirement is, but based on your implementation it's just a set where only iteration is in sorted order (it's still not semantically ordered, e.g. you shouldn't be able to access elements positionally and comparison wouldn't consider order, and insertion order doesn't matter). In that case I'd implement the existing MutableSet
abstract base class and compose around a vanilla set:
from collections.abc import MutableSet
from typing import Any, Iterable, Iterator, Protocol, TypeVar
class Sortable(Protocol):
def __lt__(self, other) -> bool: ...
T = TypeVar("T", bound=Sortable)
class OrderedSet(MutableSet[T]):
_values: set[T]
def __init__(self, values: Iterable[T] = None, /) -> None:
self._values = set() if values is None else set(values)
def add(self, value: T) -> None:
self._values.add(value)
def discard(self, value: T) -> None:
self._values.discard(value)
def __contains__(self, item: Any) -> bool:
return item in self._values
def __iter__(self) -> Iterator[T]:
return iter(sorted(self._values))
def __len__(self) -> int:
return len(self._values)
def __repr__(self) -> str:
return f"{self.__class__.__name__}({list(self)!r})"
The only custom behaviour is the use of sorted
in the __iter__
implementation. These are the tests I drove it out with:
from unittest import TestCase
from ordered_set import OrderedSet
class TestOrderedSet(TestCase):
def test_contains(self):
self.assertNotIn("foo", OrderedSet())
self.assertIn("foo", OrderedSet(["foo", "bar", "baz"]))
def test_iteration(self):
os = OrderedSet(("foo", "bar"))
os.add("baz")
self.assertEqual(
list(os),
["bar", "baz", "foo"]
)
def test_size(self):
self.assertEqual(len(OrderedSet()), 0)
self.assertEqual(len(OrderedSet(["foo", "bar", "baz"])), 3)
def test_addition(self):
os = OrderedSet()
for value in ["foo", "bar", "baz"]:
os.add(value)
self.assertEqual(len(os), 3)
self.assertIn("bar", os)
def test_removal(self):
os = OrderedSet(["foo", "bar", "baz"])
os.remove("baz")
self.assertEqual(len(os), 2)
self.assertNotIn("baz", os)
def test_rejects_keyword_args(self):
with self.assertRaises(TypeError):
OrderedSet(values=["foo", "bar", "baz"])
def test_repr(self):
self.assertEqual(
repr(OrderedSet([3, 1, 2])),
"OrderedSet([1, 2, 3])",
)
I implement MutableSet
rather than extending UserDict
because:
it has better semantics, what we're trying to create is a mutable set;
it keeps you compliant with the Liskov substitution principle, meaning an OrderedSet
can be used anywhere any other Set
* or MutableSet
can:
- your implementation can be
add
ed to but lacks the removal methods: clear
, discard
, pop
, remove
;
- your implementation also lacks the "magic" methods that allow e.g.:
>>> OrderedSet((1, 3)) | {2, 4}
OrderedSet([1, 2, 3, 4])
it gives us more freedom in implementation. For example, if sorting the set for every iteration is a crucial performance bottleneck, we can keep the same tests and interface and swap over to a bisected list:
from bisect import bisect_left
from collections.abc import MutableSet
from typing import Any, Iterable, Iterator, Protocol, TypeVar
class Sortable(Protocol):
def __lt__(self, other) -> bool: ...
T = TypeVar("T", bound=Sortable)
class OrderedSet(MutableSet[T]):
_values: list[T]
def __init__(self, values: Iterable[T] = None, /) -> None:
self._values = [] if values is None else sorted(values)
def add(self, value: T) -> None:
index = bisect_left(self._values, value)
if index == len(self) or self._values[index] != value:
self._values.insert(index, value)
def discard(self, value: T) -> None:
index = bisect_left(self._values, value)
if index < len(self) and self._values[index] == value:
self._values.pop(index)
def __contains__(self, value: Any) -> bool:
index = bisect_left(self._values, value)
return index < len(self) and self._values[index] == value
def __iter__(self) -> Iterator[T]:
return iter(self._values)
def __len__(self) -> int:
return len(self._values)
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self._values!r})"
* but not where any set
can, see e.g. Why are set methods like .intersection() not supported on set-like objects?