I'm assuming the missing functions are:
double px(point& p) { return p.get<0>(); }
double py(point& p) { return p.get<1>(); }
double nx(Node* n) { return px(n->pos); }
double ny(Node* n) { return py(n->pos); }
The first real issue I see is:
pts.insert(make_pair(*root, 0));
That inserts a copy of the object pointed to by root
. Any references will not point to the copy. root
is always leaked for no reason.
This is a better start that might actually work until the tree invalidates references:
pts.insert(std::make_pair(Node{nullptr, start, 0}, 0));
Node const* last = &pts.begin()->first;
Same here:
pts.insert(std::make_pair(*pnew, id));
last = pnew;
*pnew
is always leaked, and you didn't want last
to point to it. To "fix" it naievely, I'd use a helper function:
auto insert = [&pts](Node n) -> Node const* {
pts.insert(std::make_pair(n, n.id));
auto it = std::find_if(pts.begin(), pts.end(),
[](ptval const& p) { return p.second == n.id; });
return &it->first;
};
Node const* last = insert(Node{nullptr, start, 0});
As others have noted, this is definitely wrong:
ptval res;
pts.query(bgi::nearest(newpos, 1), &res);
Node* pnearest = &(res[0].first);
This by definition takes the address of the local copy of a ptval
's Node element. Instead, let's use the same naive (probably enormously inefficient) approach:
auto find = [&pts](int id) -> Node const* {
auto it = std::find_if(pts.begin(), pts.end(),
[id](ptval const& p) { return p.second == id; });
return &it->first;
};
auto insert = [&, find](Node n) -> Node const* {
pts.insert(std::make_pair(n, n.id));
return find(n.id);
};
Now you can have:
Node const* last = insert(Node{nullptr, start, 0}); // insert root
As well as:
ptval res;
pts.query(bgi::nearest(newpos, 1), &res);
Node new_node{find(res.second), newpos, id};
if (id == marker) {
marker_parent = new_node.parent;
}
parents.push_back(new_node.parent->id);
Next up
srand(time(nullptr)); // seed rng
// generate random pos
double xr = ((double)rand() / RAND_MAX) * 200;
double yr = ((double)rand() / RAND_MAX) * 200;
Prefer standard library:
std::mt19937 prng{std::random_device{}()};
std::uniform_real_distribution<double> dist(0, 200);
// generate random pos
double xr = dist(prng);
double yr = dist(prng);
Next up
Node* save;
Prefer to initialize:
Node* save = nullptr;
Know your loops.
int c = 1; // running count
// ...
while (c < 2000) {
//...
c++;
}
Should just be
for (int id = 1; id<2000; ++id) {
//...
}
Note the naming as well.
Don't use using namespace std;
(Why is "using namespace std;" considered bad practice?)
Applying all this leads me to:
Live On Compiler Explorer
//#include <algorithm>
#include <iostream>
//#include <cmath>
//#include <stdlib.h>
//#include <stdio.h>
//#include <string>
//#include <vector>
//#include <set>
#include <boost/geometry.hpp>
#include <boost/geometry/geometries/box.hpp>
#include <boost/geometry/geometries/point.hpp>
#include <boost/geometry/geometries/register/point.hpp>
#include <boost/geometry/geometry.hpp>
#include <boost/geometry/index/rtree.hpp>
#include <random>
namespace bg = boost::geometry;
namespace bgi = boost::geometry::index;
typedef bg::model::point<double, 2, bg::cs::cartesian> point;
struct Node {
Node const* parent = nullptr;
point pos = {};
int id = -1;
};
typedef std::pair<Node, int> ptval;
BOOST_GEOMETRY_REGISTER_POINT_2D_GET_SET(Node, double, bg::cs::cartesian,
pos.get<0>, pos.get<1>, pos.set<0>,
pos.set<1>)
double px(point const& p) { return p.get<0>(); }
double py(point const& p) { return p.get<1>(); }
double nx(Node const* n) { return px(n->pos); }
double ny(Node const* n) { return py(n->pos); }
int main(int argc, char** argv) {
point start(0, 0) /*, end(200, 200)*/;
bgi::rtree<ptval, bgi::quadratic<16>> pts;
auto find = [&pts](int id) -> Node const* {
auto it = std::find_if(pts.begin(), pts.end(),
[id](ptval const& p) { return p.second == id; });
return &it->first;
};
auto insert = [&, find](Node n) -> Node const* {
pts.insert(std::make_pair(n, n.id));
return find(n.id);
};
Node const* last = insert(Node{nullptr, start, 0}); // insert root
std::mt19937 prng{argc > 1 ? atoi(argv[1]) : std::random_device{}()};
std::uniform_real_distribution<double> dist(0, 200);
std::vector<int> parents{-1}; // actual parent ids
Node const* marker_parent = nullptr;
int marker = 500;
for (int id = 1; id < 2000; ++id) {
// generate random pos
point newpos(dist(prng), dist(prng));
ptval res;
pts.query(bgi::nearest(newpos, 1), &res);
Node new_node{find(res.second), newpos, id};
if (id == marker) {
marker_parent = new_node.parent;
}
parents.push_back(new_node.parent->id);
if (id > marker && id < marker + 50 &&
marker_parent->id != parents[marker]) {
std::cout << "CHANGED " << id << " " << parents[marker] << " "
<< marker_parent->id << " " << marker_parent
<< " " << nx(marker_parent) << " "
<< ny(marker_parent) << std::endl;
}
last = insert(new_node);
}
std::cout << "ULTIMATE " << parents[marker] << " ";
if (marker_parent)
std::cout << marker_parent->id << std::endl;
else
std::cout << std::endl;
}
Prints (with the seed fixed at 42 for online demo):
ULTIMATE 307 1622
The funny thing is, as far as I can see last
is never used. So you can probably do with much simpler and more efficient:
auto insert = [&](Node n) { pts.insert(std::make_pair(n, n.id)); };
Isolating The Bug - Root Cause
Note above when I said "might actually work until the tree invalidates references". Sadly I was unable to find any documentation of reference stability/invalidation guarantees for rtree
¹.
With different seeds we still get output like e.g. for seed 47:
CHANGED 501 189 38 0x617000004e30 26.0044 140.042
...
CHANGED 549 189 38 0x617000004e30 26.0044 140.042
ULTIMATE 189 474
Obviously, at least all the lines are identical (except for the running counter 501..549). Comparing seed 40:
CHANGED 517 108 500 0x560957ad1638 109.71 53.341
...
CHANGED 549 108 500 0x560957ad1638 109.71 53.341
ULTIMATE 108 590
Illustrates that sometimes the marker "randomly" changes during the game. The problem then, clearly, is that rtree
can reallocate, invalidating Node
references.
The number 517 is interesting to me for its relation to 16
in bgi::quadratic<16>
. So, I thought to make a much more granular test instead of randomly monitoring the 500th "marker". Why not monitor all parents?
constexpr auto N = 2000;
std::vector<Node const*> parents(N, nullptr); // parents refs
std::vector<int> parent_ids(N, -1); // parent ids
for (int id = 1; id < N; ++id) {
// generate random pos
point newpos(dist(prng), dist(prng));
ptval res;
pts.query(bgi::nearest(newpos, 1), &res);
Node new_node{find(res.second), newpos, id};
parents[id] = new_node.parent;
parent_ids[id] = new_node.parent->id;
bool invalidated = !std::equal(
parents.begin(), parents.end(), parent_ids.begin(),
[](Node const* p, int id) { return !p || p->id == id; });
std::cout << "invalidated: " << std::boolalpha << invalidated << "\n";
//assert(!invalidated);
insert(new_node);
}
Now running this a few hundred times with random seeds shows a pattern of always invalidating after 16 nodes:
$ for a in {1..100}; do ./build/sotest; done | uniq -c | sort | uniq
16 invalidated: false
1983 invalidated: true
1 ULTIMATE 101 1393
1 ULTIMATE 103 262
1 ULTIMATE 103 5
1 ULTIMATE 104 536
1 ULTIMATE 108 1406
1 ULTIMATE 111 111
This confirms beyond any statistical doubt that reallocation is causing the references (i.e. the pointers) to become invalidated.
FIXING
I suggest keeping the parent references by id
only. Interestingly, since id
is already part of Node
you can do without the duplication in std::pair
by using a custom ``indexable<>or custom
IndexableGetter` template argument.
Let's store references to Node
s in the tree, instead, using a custom indexable:
using NodeRef = std::reference_wrapper<Node const>;
struct MyIndexable {
using value_type = NodeRef;
using result_type = Node const&;
result_type operator()(NodeRef r) const { return r; }
};
Now we can trivially build a tree from those:
bgi::rtree<NodeRef, bgi::quadratic<16>, MyIndexable> pts;
All we need is some stable storage for the referenced nodes:
std::deque<Node> storage; // reference stability adding/removing at either end
//std::list<Node> storage; // iterator and reference stability (except removed)
auto insert = [&](Node n) {
storage.push_back(std::move(n));
pts.insert(NodeRef(storage.back()));
};
The rest of the program can remain pretty much the same - except for a few respellings to accomodate NodeRef
:
Live On Coliru
#undef NDEBUG
#include <iostream>
#include <boost/geometry.hpp>
#include <boost/geometry/geometries/box.hpp>
#include <boost/geometry/geometries/point.hpp>
#include <boost/geometry/geometries/register/point.hpp>
#include <boost/geometry/geometry.hpp>
#include <boost/geometry/index/rtree.hpp>
#include <random>
namespace bg = boost::geometry;
namespace bgi = boost::geometry::index;
typedef bg::model::point<double, 2, bg::cs::cartesian> point;
struct Node {
Node const* parent = nullptr;
point pos = {};
int id = -1;
};
BOOST_GEOMETRY_REGISTER_POINT_2D_GET_SET(Node, double, bg::cs::cartesian,
pos.get<0>, pos.get<1>, pos.set<0>,
pos.set<1>)
double px(point const& p) { return p.get<0>(); }
double py(point const& p) { return p.get<1>(); }
double nx(Node const* n) { return px(n->pos); }
double ny(Node const* n) { return py(n->pos); }
using NodeRef = std::reference_wrapper<Node const>;
struct MyIndexable {
using value_type = NodeRef;
using result_type = Node const&;
result_type operator()(NodeRef r) const { return r; }
};
int main(int argc, char** argv) {
bgi::rtree<NodeRef, bgi::quadratic<16>, MyIndexable> pts;
std::deque<Node> storage; // reference stability adding/removing at either end
//std::list<Node> storage; // iterator and reference stability (except removed)
auto insert = [&](Node n) {
storage.push_back(std::move(n));
pts.insert(NodeRef(storage.back()));
};
insert({nullptr, point(0, 0), 0}); // insert root
std::mt19937 prng{argc > 1 ? atoi(argv[1]) : std::random_device{}()};
std::uniform_real_distribution<double> dist(0, 200);
constexpr auto N = 2000;
std::vector<NodeRef> parents; // parents refs
std::vector<int> parent_ids; // parent ids
for (int id = 1; id < N; ++id) {
// generate random pos
point newpos(dist(prng), dist(prng));
std::vector<NodeRef> res;
pts.query(bgi::nearest(newpos, 1), back_inserter(res));
Node new_node{&res.front().get(), newpos, id};
parents.push_back(*new_node.parent);
parent_ids.push_back(new_node.parent->id);
bool invalidated =
!std::equal(parents.begin(), parents.end(), parent_ids.begin(),
[](Node const& p, int id) { return p.id == id; });
assert(!invalidated);
insert(std::move(new_node));
}
std::cout << "ULTIMATE " << parent_ids.at(500) << " "
<< parents.at(500).get().id << "\n";
}
Which never trips the assert, and produces stable output like:
for a in {1..50}; do ./a.out; done
ULTIMATE 41 41
ULTIMATE 365 365
ULTIMATE 219 219
ULTIMATE 193 193
ULTIMATE 448 448
ULTIMATE 331 331
ULTIMATE 227 227
ULTIMATE 234 234
ULTIMATE 300 300
ULTIMATE 227 227
ULTIMATE 243 243
ULTIMATE 248 248
ULTIMATE 233 233
ULTIMATE 143 143
ULTIMATE 39 39
ULTIMATE 488 488
ULTIMATE 493 493
ULTIMATE 9 9
ULTIMATE 212 212
ULTIMATE 338 338
ULTIMATE 141 141
ULTIMATE 356 356
ULTIMATE 147 147
ULTIMATE 376 376
ULTIMATE 76 76
ULTIMATE 450 450
ULTIMATE 272 272
ULTIMATE 34 34
ULTIMATE 492 492
ULTIMATE 478 478
ULTIMATE 84 84
ULTIMATE 416 416
ULTIMATE 222 222
ULTIMATE 457 457
ULTIMATE 95 95
ULTIMATE 446 446
ULTIMATE 233 233
ULTIMATE 480 480
ULTIMATE 265 265
ULTIMATE 415 415
ULTIMATE 289 289
ULTIMATE 121 121
ULTIMATE 344 344
ULTIMATE 110 110
ULTIMATE 429 429
ULTIMATE 31 31
ULTIMATE 344 344
ULTIMATE 172 172
ULTIMATE 20 20
ULTIMATE 394 394
Bonus
A version without the now-redundant sanity checks and unused code: Live On Coliru
¹ although there are some notes about iterator invalidation