How do I transform a directed acyclic graph into a hash value such that any two isomorphic graphs hash to the same value? It is acceptable, but undesirable for two isomorphic graphs to hash to different values, which is what I have done in the code below. We can assume that the number of vertices in the graph is at most 11.
I am particularly interested in Python code.
Here is what I did. If self.lt
is a mapping from node to descendants (not children!), then I relabel the nodes according to a modified topological sort (that prefers to order elements with more descendants first if it can). Then, I hash the sorted dictionary. Some isomorphic graphs will hash to different values, especially as the number of nodes grows.
I have included all the code to motivate my use case. I am calculating the number of comparisons required to find the median of 7 numbers. The more that isomorphic graphs hash to the same value the less work that has to be redone. I considered putting larger connected components first, but didn't see how to do that quickly.
from tools.decorator import memoized # A standard memoization decorator
class Graph:
def __init__(self, n):
self.lt = {i: set() for i in range(n)}
def compared(self, i, j):
return j in self.lt[i] or i in self.lt[j]
def withedge(self, i, j):
retval = Graph(len(self.lt))
implied_lt = self.lt[j] | set([j])
for (s, lt_s), (k, lt_k) in zip(self.lt.items(),
retval.lt.items()):
lt_k |= lt_s
if i in lt_k or k == i:
lt_k |= implied_lt
return retval.toposort()
def toposort(self):
mapping = {}
while len(mapping) < len(self.lt):
for i, lt_i in self.lt.items():
if i in mapping:
continue
if any(i in lt_j or len(lt_i) < len(lt_j)
for j, lt_j in self.lt.items()
if j not in mapping):
continue
mapping[i] = len(mapping)
retval = Graph(0)
for i, lt_i in self.lt.items():
retval.lt[mapping[i]] = {mapping[j]
for j in lt_i}
return retval
def median_known(self):
n = len(self.lt)
for i, lt_i in self.lt.items():
if len(lt_i) != n // 2:
continue
if sum(1
for j, lt_j in self.lt.items()
if i in lt_j) == n // 2:
return True
return False
def __repr__(self):
return("[{}]".format(", ".join("{}: {{{}}}".format(
i,
", ".join(str(x) for x in lt_i))
for i, lt_i in self.lt.items())))
def hashkey(self):
return tuple(sorted({k: tuple(sorted(v))
for k, v in self.lt.items()}.items()))
def __hash__(self):
return hash(self.hashkey())
def __eq__(self, other):
return self.hashkey() == other.hashkey()
@memoized
def mincomps(g):
print("Calculating:", g)
if g.median_known():
return 0
nodes = g.lt.keys()
return 1 + min(max(mincomps(g.withedge(i, j)),
mincomps(g.withedge(j, i)))
for i in nodes
for j in nodes
if j > i and not g.compared(i, j))
g = Graph(7)
print(mincomps(g))