You'd use an interval container that uses optimized storage and algorithms.
In this example using Boost ICL, I have made some "arbitrary" choices to generate nice disjunct TileSets
:
using TileSets = icl::split_interval_set<int>;
struct TileSet : TileSets::interval_type::type {
TileSet(int b, int e) : TileSets::interval_type(closed(b, e)) {}
};
struct Tile : TileSets::interval_type::type {
Tile(int id) : TileSets::interval_type(closed(id, id)) {}
};
The beauty is the high-level coding:
Live On Coliru
TileSets gen_tiles (size_t n = 100000);
TileSets gen_tilesets(size_t n = (2ull << 8) + 1);
#include <iostream>
int main() {
auto const tiles = gen_tiles (10);
auto const ts = gen_tilesets(30);
std::cout << ts << "\n----\n";
for (auto hit : tiles & ts) {
std::cout << hit.lower() << " hits in tileset " << *ts.find(hit) << "\n";
}
}
Prints
{[8,71)[71,133)[133,206)[206,231)[231,465)[465,467)[467,565](565,581)[581,651](651,907)[907,1000)[1000,1395](1395,1429)[1429,1560](1560,1706)[1706,1819)[1819,1835)[1835,1997)[1997,2124](2124,2328)[2328,2913)[2913,2922)[2922,3043)[3043,3338)[3338,3664](3664,3825](3825,3999)[3999,4320](4320,4506](4506,4561](4561,4593](4593,4668)[4668,5143)[5143,5248](5248,5633)[5633,5925](5925,6012](6012,6076)[6076,6117](6117,6119](6119,6175](6175,6184)[6184,6509)[6509,6804](6804,7081](7081,7220)[7220,7852](7852,8325)[8325,8600](8600,8662](8662,9386](9386,9423)[9423,9489](9489,9657](9657,9700](9700,9738](9738,9833](9833,9923]}
----
1561 hits in tileset (1560,1706)
1835 hits in tileset [1835,1997)
3746 hits in tileset (3664,3825]
4459 hits in tileset (4320,4506]
5969 hits in tileset (5925,6012]
5987 hits in tileset (5925,6012]
7320 hits in tileset [7220,7852]
7797 hits in tileset [7220,7852]
7966 hits in tileset (7852,8325)
9508 hits in tileset (9489,9657]
PERFORMANCE
When run with the default sizes (100000 tiles in 2^8+1 tilesets) it takes 0.034s on my box
$ time ./test | tee >(echo "total lines: $(wc -l)") | tail
9987 hits in tileset (9984,9990]
9988 hits in tileset (9984,9990]
9989 hits in tileset (9984,9990]
9990 hits in tileset (9984,9990]
9991 hits in tileset (9990,9995]
9992 hits in tileset (9990,9995]
9993 hits in tileset (9990,9995]
9994 hits in tileset (9990,9995]
9995 hits in tileset (9990,9995]
total lines: 9988
real 0m0.034s
user 0m0.029s
sys 0m0.008s
Live On Coliru that runs in 0.064s. That includes the time taken for output, which does redundant lookups (ts.find(hit)
)!
UPDATE - Higher Volumes
More performance testing with higher volumes and more specific timing output:
Live On Coliru
#include <boost/icl/interval_set.hpp>
#include <boost/icl/split_interval_set.hpp>
namespace icl = boost::icl;
using TileSets = icl::split_interval_set<int>;
struct TileSet : TileSets::interval_type::type {
TileSet(int b, int e) : TileSets::interval_type(closed(b, e)) {}
};
struct Tile : TileSets::interval_type::type {
Tile(int id) : TileSets::interval_type(id) {}
};
TileSets gen_tiles (size_t n = (1ull << 22));
TileSets gen_tilesets(size_t n = (1ull << 12));
#include <iostream>
#include <iomanip>
#include <boost/chrono/chrono_io.hpp>
template <typename F>
auto timed(char const* task, F&& f) {
using namespace boost::chrono;
struct _ {
high_resolution_clock::time_point s;
const char* task;
~_() { std::cout << " -- (" << task << " completed in " << duration_cast<milliseconds>(high_resolution_clock::now() - s) << ")\n"; }
} timing { high_resolution_clock::now(), task };
return f();
}
int main() {
auto const tiles = timed("Generate tiles", [] { return gen_tiles(); });
auto const ts = timed("Generate tile sets", [] { return gen_tilesets(); });
//std::cout << ts << "\n----\n";
std::cout << "Random tiles generated: " << tiles.iterative_size() << " across a domain of " << std::setprecision(2) << static_cast<double>(tiles.size()) << "\n";
std::cout << "Tilesets to match against: " << ts.iterative_size() << " across a domain of " << std::setprecision(2) << static_cast<double>(tiles.size()) << "\n";
timed("Query intersection", [&] { std::cout << "Total number of hits: " << (tiles & ts).iterative_size() << "\n"; });
timed("Query difference", [&] { std::cout << "Total number of misses: " << (tiles - ts).iterative_size() << "\n"; });
//for (auto hit : tiles & ts) {
//std::cout << hit.lower() << " hits in tileset " << *ts.find(hit) << "\n";
//}
}
#include <random>
static auto gen_tile_id = [prng=std::mt19937{42}, dist=std::uniform_int_distribution<>()] () mutable
{ return dist(prng); };
TileSets gen_tiles(size_t n) {
TileSets r;
std::generate_n(icl::inserter(r, r.end()), n, [] () -> Tile { return gen_tile_id(); });
return r;
}
TileSets gen_tilesets(size_t n) {
TileSets r;
std::generate_n(icl::inserter(r, r.end()), n, [] () -> TileSet {
auto b = gen_tile_id(), e = gen_tile_id();
return { std::min(b,e), std::max(b,e) };
});
return r;
}
Prints (on my box):
-- (Generate tiles completed in 3773 milliseconds)
-- (Generate tile sets completed in 152 milliseconds)
Random tiles generated: 4190133 across a domain of 4.2e+06
Tilesets to match against: 8191 across a domain of 4.2e+06
Total number of hits: 4187624
-- (Query intersection completed in 1068 milliseconds)
Total number of misses: 2509
-- (Query difference completed in 533 milliseconds)