So, I have implemented a path overlap on a graph with a custom-made A*, using boost::graph
, but all developed from scratch.
What is a path overlap is simple. Given a graph of letters ('A'-> 'C'
, ...), and a word HELLO, I need to find the best overlapping starting from 'H'
in the graph, ending in 'O'
(if it exists, or end when the path length is reached). The "best" refers to the way candidate nodes are chosen, simply exploring the letter that is nearer to the current one in the word. The graph weights are therefore dynamic, they depend on the input word. See the following example:
/*
* B H
* / \ /
* A---D---F---G---I
* \ /\
* C E
*
* input: A E I O U
* result: A D F G I
*/
What I've done is simple: using a DFS that tracks nodes and inserts them in a priority queue. Obviously, this isn't quite optimal: I'd very much like to use boost::depth_first_search
.
My problem is simple: all the examples use a graph traversal with no initial node. The graph is very large, like 8M nodes minimum, hoping to get up to 100M nodes, on a cluster. So I am aiming to distribute the graph over a cluster with Parallel BGL.
What to you suggest?
Probably my from scratch approach isn't really the best one, it would be easier to use BGL, but although I succeeded in trying a very basic astar_search
, I am failing to try a simple depth_first_search
. I am unable to understand color maps and how I can set the destination node, so I will post my code, hoping someone will point me in the right direction.
Note again, I need to move the code, when it will work, to Parallel BGL.
Thanks!
#include <iostream>
#include <unordered_map>
#include <vector>
#include <string>
#include <utility>
#include <list>
#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/breadth_first_search.hpp>
#include <boost/graph/depth_first_search.hpp>
#include <boost/graph/astar_search.hpp>
// Funky
std::unordered_map<char, int> letters;
auto getletter = [](std::size_t i) -> char { return 'A' + i; };
typedef boost::adjacency_list<boost::vecS, // V
boost::vecS, // E
boost::bidirectionalS,
boost::no_property,
int // Ignore weights for now
> graph;
typedef boost::graph_traits<graph>::edge_iterator edge_iterator;
// Used to end the A*-like search algorithm
struct end_astar { };
// Custom A* visitor
template <class Vertex>
class bestmatch_visitor : public boost::default_dfs_visitor //public boost::default_astar_visitor
{
public:
// Constructor
bestmatch_visitor(Vertex goal) : destination_(goal) { };
// ==============
// == VERTICES ==
// ==============
template<class Graph>
void initialize_vertex(Vertex u, const Graph &g)
{
std::cout << "init " << u << " " << getletter(u) << std::endl;
}
// Push a new vertex in the stack
template<class Graph>
void discover_vertex(Vertex u, const Graph &g)
{
std::cout << "discover " << u << " " << getletter(u) << std::endl;
}
// Pop a vertex and examine it
template <class Graph>
void examine_vertex(Vertex u, const Graph& g)
{
std::cout << ">>>>> examine " << u << " " << getletter(u) << std::endl;
if (u == destination_) throw end_astar();
}
// Vertex examined, moving to the blacklist
template <class Graph>
void finish_vertex(Vertex u, const Graph& g)
{
std::cout << "vertex examined, closing " << u << " " << getletter(u) << std::endl;
}
// ===========
// == EDGES ==
// ===========
// Examine out edges
template <class Edge, class Graph>
void examine_edge(Edge e, const Graph& g)
{
std::cout << "--> edge examine " << e << std::endl;
}
// Relax edge if distance from goal reduces
template <class Edge, class Graph>
void edge_relaxed(Edge e, const Graph& g)
{
std::cout << "--> edge relax " << e << std::endl;
}
// Distance from goal does not decrease: don't relax
template <class Edge, class Graph>
void edge_not_relaxed(Edge e, const Graph& g)
{
std::cout << "--> edge NOT relaxed " << e << std::endl;
}
// Ooops! Already checked this one
template <class Edge, class Graph>
void black_target(Edge e, const Graph& g)
{
std::cout << "--> BLACKLISTED " << e << std::endl;
}
private:
Vertex destination_;
};
int main()
{
/*
* B H
* / \ /
* A---D---F---G---I
* \ /\
* C E
*
* input: A Z I M U T
* result: A C D F G I
*
* nodes: ABCDEFGHI
* codes: 012345678
*/
for (int i = 'A'; i <= 'I'; i++) letters[i] = i - 'A';
graph g;
boost::add_edge(letters['A'], letters['B'], g);
boost::add_edge(letters['A'], letters['C'], g);
boost::add_edge(letters['A'], letters['D'], g);
boost::add_edge(letters['B'], letters['D'], g);
boost::add_edge(letters['C'], letters['D'], g);
boost::add_edge(letters['D'], letters['E'], g);
boost::add_edge(letters['D'], letters['F'], g);
boost::add_edge(letters['F'], letters['G'], g);
boost::add_edge(letters['G'], letters['H'], g);
boost::add_edge(letters['G'], letters['I'], g);
edge_iterator ei, ee;
for (boost::tie(ei, ee) = boost::edges(g); ei != ee; ei++)
{
graph::edge_descriptor d = *ei;
std::cout << getletter(boost::source(*ei, g)) << " -> " <<
getletter(boost::target(*ei, g)) << " " << std::endl; //<< g[d] << std::endl;
}
// Predecessors
std::vector<graph::vertex_descriptor> p(boost::num_vertices(g));
// Distances
std::vector<graph::vertex_descriptor> d(boost::num_vertices(g));
// Color: HOW?
//typename boost::property_map<graph, boost::vertex_color_t>::type c;
// Start and finish
int start = letters['A'];
int goal = letters['I'];
bestmatch_visitor<int> v(letters['A']);
// Useless:
boost::depth_first_search(g, boost::visitor(v));
// HOW?
//boost::depth_first_visit(g, letters['A'], boost::visitor(v), c);
}