4

I have two graphs that are effectively trees (i.e. single root, no loops). They are minimally different: one of them has a leaf that the other doesn't. Is there a way to use Gremlin or potentially other graph query language, such as Cypher, in order to return a graph that represents the difference between these two trees?

DOT example:

graph A {
  a -- b -- c;
}

graph B {
  a -- b -- c;
  b -- d;
}

graph C = graph B - graph A : // <-- How do I do that?

graph C {
  b -- d;
}
  • What level of differences do you want this to work with? Do you want to ignore properties on nodes/relationships? Do you need to account for relationship types, or are they to be ignored? The nodes themselves between the graphs are not the same nodes, it seems, so how are they to be identified? By their labels/types? – InverseFalcon Nov 02 '18 at 23:36

1 Answers1

3

The following junit test is a simple viable solution for this problem. See the comments for the algorithm. There are much better ways to do this, but this is just to show that there is a solution.

package com.graph.test;

import static org.junit.Assert.assertEquals;

import java.util.ArrayList;
import java.util.List;

import org.apache.tinkerpop.gremlin.structure.Direction;
import org.apache.tinkerpop.gremlin.structure.Edge;
import org.apache.tinkerpop.gremlin.structure.Graph;
import org.apache.tinkerpop.gremlin.structure.Vertex;
import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph;
import org.junit.Test;

/**
 * https://stackoverflow.com/questions/38786038/is-it-possible-to-calculate-difference-between-tree-graphs-in-gremlin
 * https://stackoverflow.com/questions/16553343/diff-for-directed-acyclic-graphs
 * @author wz
 *
 * Algorithm:
 *    Step one: Intersect all vertices and edges of a and b
 *    Step two: Remove all edges that are the same in a and b
 *    Step tree: Remove all non connected vertices
 *    
 *    Step three is not implemented here and one and two are very inefficient. This
 *    is just to show the principle and have a viable solution
 *
 */
public class TestGraphDiff {

  public static final String SORTKEY="sortKey";
  public static final String LINK="link";

  public Graph getGraph() {
    Graph g = TinkerGraph.open();
    Vertex a = g.addVertex("a");
    Vertex b = g.addVertex("b");
    Vertex c = g.addVertex("c");
    b.addEdge(LINK,a);
    c.addEdge(LINK, b);
    return g;
  }

  @Test
  public void testGraphDiff() {
    Graph A=getGraph();
    Graph B=getGraph();
    Vertex d = B.addVertex("d");
    Vertex b= B.traversal().V().hasLabel("b").next();
    d.addEdge(LINK, b);
    Graph C=diff(A,B);
    // there should be one edge left
    assertEquals(1,C.traversal().E().count().next().longValue());
    // there should be two vertices left
    assertEquals(2,C.traversal().V().count().next().longValue());
    // the left over edge should be "b-d"
    assertEquals("b-d",C.traversal().E().next().property(SORTKEY).value().toString());

  }

  /**
   * add a sort key to the given edge
   * @param e
   * @return - the sort key
   */
  private String addSortKey(Edge e) {
    String sortKey=e.inVertex().label()+"-"+e.outVertex().label();
    e.property(SORTKEY,sortKey);
    return sortKey;
  }

  /**
   * get the difference of two Graphs
   * @param a
   * @param b
   * @return
   */
  public Graph diff(Graph a, Graph b) {
    // step one: intersect both graphs
    Graph c= TinkerGraph.open();
    // copy all vertices of a 
    a.traversal().V().forEachRemaining(v->c.addVertex(v.label()));
    // copy all edges of a
    a.traversal().E().forEachRemaining(e->{
      Vertex iva = e.inVertex();
      Vertex ova = e.outVertex();
      Vertex ivc=c.traversal().V().hasLabel(iva.label()).next();
      Vertex ovc=c.traversal().V().hasLabel(ova.label()).next();
      addSortKey(e);
      Edge edge=ovc.addEdge(e.label(), ivc);   
      addSortKey(edge);
    });
    // copy all vertices of b that are not yet in c
    b.traversal().V().forEachRemaining(v->{
     if (c.traversal().V().hasLabel(v.label()).count().next().longValue()==0) {
       c.addVertex(v.label());
     }
     // copy all edges of b that are not yet in c
     b.traversal().E().forEachRemaining(e->{
       Vertex ivb = e.inVertex();
       Vertex ovb = e.outVertex();
       Vertex ivc=c.traversal().V().hasLabel(ivb.label()).next();
       Vertex ovc=c.traversal().V().hasLabel(ovb.label()).next();
       String sortKey=addSortKey(e);
       if (!c.traversal().E().has(SORTKEY,sortKey).hasNext()) {
         Edge edge = ovc.addEdge(e.label(), ivc);
         addSortKey(edge);
       }
     });
    });
    // step two: remove all edges that are "the same" in both graphs
    // inefficient version - would be much quicker with sorted lists
    // of edges ...
    c.traversal().E().forEachRemaining(e->{
      String sortKey=e.property(SORTKEY).value().toString();
      if (a.traversal().E().has(SORTKEY, sortKey).hasNext() &&
          c.traversal().E().has(SORTKEY, sortKey).hasNext()) {
        e.remove();
      }
    });
    // step three remove all "unconnected" vertices
    c.traversal().V().forEachRemaining(v->{
      if (!v.edges(Direction.BOTH).hasNext())
        v.remove();
    });
    return c;
  }

}
Werner Zimni
  • 108
  • 5