3

I have the following Filer entity (in the domain-driven design sense).

from dataclasses import dataclass, field

@dataclass
class Address:
    street: str
    city: str
    state: str
    zipcode: str

@dataclass
class Filer:
    cik: int
    name: str = field(hash=False, compare=True)
    state: str = field(hash=False, compare=True)
    yearend: str = field(hash=False, compare=True)
    businessaddress: Address = field(hash=False, compare=True)
    mailingaddress: Address = field(hash=False, compare=True)
    sic: int = field(hash=False, compare=True)
    ein: str = field(hash=False, compare=True, default=None)

For any Filer, cik by itself determines identity. I would, however, like to use equality comparisons to see if any other details about the Filer may have changed (e.g. vs. a prior version of the same Filer). On that basis, I set hash=False, compare=True on all fields except cik (for which hash=True, by default).

The following testcase gives a quick overview of the intended behavior:

  • Identity: fully determined by cik and tested via AssertIs and AssertIsNot
  • Equality: determined by all fields and tested via AssertEqual and AssertNotEqual
import unittest

class TestFiler(unittest.TestCase):
    
    def test_equality_same_filer(self,):
        a = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertEqual(a, b)

    def test_identity_same_filer(self,):
        a = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertIs(a, b)

    def test_equality_same_filer_new_name(self,):
        a = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "A new name for the company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertNotEqual(a, b)

    def test_identity_same_filer_new_name(self,):
        a = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "A new name for the company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertIs(a, b)

    def test_equality_different_filer_same_details(self,):
        a = Filer(4321, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertNotEqual(a, b)

    def test_identity_different_filer_same_details(self,):
        a = Filer(4321, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        b = Filer(1234, "Some company", "Some state", "0930",
                         Address("Some address", "Some city", "AB", "12345"),
                         Address("Some address", "Some city", "AB", "12345"),
                         1000,
                         1234567)
        self.assertIsNot(a, b)

if __name__ == "__main__":
    unittest.main()

The results didn't go as intended.

base) randm@pearljam /home/randm/Projects/secfilings $ /home/randm/Libraries/anaconda3/bin/python /home/randm/Projects/scrap/filer.py
....FF
======================================================================
FAIL: test_identity_same_filer (__main__.TestFiler)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/randm/Projects/scrap/filer.py", line 51, in test_identity_same_filer
    self.assertIs(a, b)
AssertionError: Filer(cik=1234, name='Some company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567) is not Filer(cik=1234, name='Some company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567)

======================================================================
FAIL: test_identity_same_filer_new_name (__main__.TestFiler)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/randm/Projects/scrap/filer.py", line 77, in test_identity_same_filer_new_name
    self.assertIs(a, b)
AssertionError: Filer(cik=1234, name='Some company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567) is not Filer(cik=1234, name='A new name for the company', state='Some state', yearend='0930', businessaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), mailingaddress=Address(street='Some address', city='Some city', state='AB', zipcode='12345'), sic=1000, ein=1234567)

----------------------------------------------------------------------
Ran 6 tests in 0.001s

FAILED (failures=2)

Is there a way for me to use is identity tests (without resorting to a dataclass method is_ or something like that, which would change the clean syntax I'm looking for in client code). Or am I simply misusing identity (which I believe is based on pointer values in CPython) and should instead be using hash equality explicitly in my client code?

Nimantha
  • 6,405
  • 6
  • 28
  • 69
MikeRand
  • 4,788
  • 9
  • 41
  • 70

2 Answers2

2

There's no way to override Python's is identity checks, as @LhasaDad pointed out. It will always refer to objects being the actual same object. (Kinda works for strings but behaves "unexpectedly" for integers.)

You could use unsafe_hash=True (with eq=True) in the dataclass definitions so that you can use hash(a) == hash(b). But then you may as well create a method is_ and do a.is_(b) if you want it feel more natural. Be aware that if there are other users/coders of your classes, you need to be clear about when is_ can be True but == can be false; and all other combos.

@dataclass(unsafe_hash=True)
Filer:
    ...  # everything else the same

Then your identity test for that would be based on hash().

Also, you should be using test setUp for a and b instead of copy-pasting them in every test. Someone who reads your code (like us) still has to check the entire definition of both in each test, to see what's different. And in a month, so will you. For objects which are only slightly different for your tests, use dataclasses.replace().

Here's a more read-able version of the unit tests, with the hash-based checks added in:

import dataclasses
import unittest

class TestFiler(unittest.TestCase):
    def setUp(self):
        self.a = Filer(1234, "Some company", "Some state", "0930",
                       Address("Some address", "Some city", "AB", "12345"),
                       Address("Some address", "Some city", "AB", "12345"),
                       1000, 1234567)
        self.b = Filer(1234, "Some company", "Some state", "0930",
                       Address("Some address", "Some city", "AB", "12345"),
                       Address("Some address", "Some city", "AB", "12345"),
                       1000, 1234567)
    
    def test_equality_same_filer(self):
        self.assertEqual(self.a, self.b)
    
    def test_identity_same_filer(self):  # will still fail
        self.assertIs(self.a, self.b)
    
    def test_equality_same_filer_new_name(self):
        # make it clear that `a` and `c` only differ by name:
        c = dataclasses.replace(self.a, name="A new name for the company")
        self.assertNotEqual(self.a, c)
    
    def test_identity_same_filer_new_name(self):  # will still fail
        # or put c also in `setUp`
        c = dataclasses.replace(self.a, name="A new name for the company")
        self.assertIs(self.a, c)
    
    def test_equality_different_filer_same_details(self):
        new_a = dataclasses.replace(self.a, cik=4321)
        self.assertIsNot(new_a, self.a)  # better
    
    def test_identity_different_filer_same_details(self):
        new_a = dataclasses.replace(self.a, cik=4321)
        self.assertIsNot(new_a, self.a)
    
    def test_hash_same_filer(self):  # NEW
        self.assertEqual(hash(self.a), hash(self.b))
    
    def test_hash_same_filer_new_name(self):  # NEW
        c = dataclasses.replace(self.a, name="A new name for the company")
        self.assertEqual(hash(c), hash(self.a))
    
    def test_identity_different_filer_same_details(self):  # NEW
        diff_a = dataclasses.replace(self.a, cik=4321)
        self.assertNotEqual(hash(diff_a), hash(self.a))


if __name__ == "__main__":
    unittest.main()
aneroid
  • 12,983
  • 3
  • 36
  • 66
1

You are miss using assertIs. it uses the is behavior of python. that is, they must be referring to the same object. since you have constructed 2 different objects the is test between them will always be false. Equals is the correct test for equivalence.

LhasaDad
  • 1,786
  • 1
  • 12
  • 19