I've written a simple (-ish) test for the approach I mentioned in the comments of the OP. It's obviously not battle-tested, but the idea is represented - you should be able to take it from here.
Since the amount of bytes read is so much smaller than if one were to serialize the entire engine, the performance of the two approaches might actually be comparable. Testing this hypothesis, as well as further optimization, are left as an exercise for the reader.
#include <iostream>
#include <random>
#include <chrono>
#include <cstdint>
#include <fstream>
using namespace std;
struct rng_wrap
{
// it would also be advisable to somehow
// store what kind of RNG this is,
// so we don't deserialize an mt19937
// as a linear congruential or something,
// but this example only covers mt19937
uint64_t seed;
uint64_t invoke_count;
mt19937 rng;
typedef mt19937::result_type result_type;
rng_wrap(uint64_t _seed) :
seed(_seed),
invoke_count(0),
rng(_seed)
{}
rng_wrap(istream& in) {
in.read(reinterpret_cast<char*>(&seed), sizeof(seed));
in.read(reinterpret_cast<char*>(&invoke_count), sizeof(invoke_count));
rng = mt19937(seed);
rng.discard(invoke_count);
}
void discard(unsigned long long z) {
rng.discard(z);
invoke_count += z;
}
result_type operator()() {
++invoke_count;
return rng();
}
static constexpr result_type min() {
return mt19937::min();
}
static constexpr result_type max() {
return mt19937::max();
}
};
ostream& operator<<(ostream& out, rng_wrap& wrap)
{
out.write(reinterpret_cast<char*>(&(wrap.seed)), sizeof(wrap.seed));
out.write(reinterpret_cast<char*>(&(wrap.invoke_count)), sizeof(wrap.invoke_count));
return out;
}
istream& operator>>(istream& in, rng_wrap& wrap)
{
wrap = rng_wrap(in);
return in;
}
void test(rng_wrap& rngw, int count, bool quiet=false)
{
uniform_int_distribution<int> integers(0, 9);
uniform_real_distribution<double> doubles(0, 1);
normal_distribution<double> stdnorm(0, 1);
if (quiet) {
for (int i = 0; i < count; ++i)
integers(rngw);
for (int i = 0; i < count; ++i)
doubles(rngw);
for (int i = 0; i < count; ++i)
stdnorm(rngw);
} else {
cout << "Integers:\n";
for (int i = 0; i < count; ++i)
cout << integers(rngw) << " ";
cout << "\n\nDoubles:\n";
for (int i = 0; i < count; ++i)
cout << doubles(rngw) << " ";
cout << "\n\nNormal variates:\n";
for (int i = 0; i < count; ++i)
cout << stdnorm(rngw) << " ";
cout << "\n\n\n";
}
}
int main(int argc, char** argv)
{
rng_wrap rngw(123456790ull);
test(rngw, 10, true); // this is just so we don't start with a "fresh" rng
uint64_t seed1 = rngw.seed;
uint64_t invoke_count1 = rngw.invoke_count;
ofstream outfile("rng", ios::binary);
outfile << rngw;
outfile.close();
cout << "Test 1:\n";
test(rngw, 10); // test 1
ifstream infile("rng", ios::binary);
infile >> rngw;
infile.close();
cout << "Test 2:\n";
test(rngw, 10); // test 2 - should be identical to 1
return 0;
}