This solution might be a bit shorter. Unfortunately, it needs to create a discrete_distribution<>
object in every step, which might be prohibitive when drawing a lot of samples.
#include <iostream>
#include <boost/random/discrete_distribution.hpp>
#include <boost/random/mersenne_twister.hpp>
using namespace boost::random;
int main(int, char**) {
std::vector<double> w = { 2, 2, 1, 1, 2, 2, 1, 1, 2, 2 };
discrete_distribution<> dist(w);
int n = 10;
boost::random::mt19937 gen;
std::vector<int> samples;
for (auto i = 0; i < n; i++) {
samples.push_back(dist(gen));
w[*samples.rbegin()] = 0;
dist = discrete_distribution<>(w);
}
for (auto iter : samples) {
std::cout << iter << " ";
}
return 0;
}
Improved answer:
After carefully looking for a similar question on this site (Faster weighted sampling without replacement), I found a stunningly simple algorithm for weighted sampling without replacement, it is just a bit complicated to implement in C++. Note, that this is not the most efficient algorithm, but it seems to me the simplest one to implement.
In https://doi.org/10.1016/j.ipl.2005.11.003 the method is described in detail.
Especially, it is not efficient if the sample size is much smaller than the basic population.
#include <iostream>
#include <iterator>
#include <boost/random/uniform_01.hpp>
#include <boost/random/mersenne_twister.hpp>
using namespace boost::random;
int main(int, char**) {
std::vector<double> w = { 2, 2, 1, 1, 2, 2, 1, 1, 2, 10 };
uniform_01<> dist;
boost::random::mt19937 gen;
std::vector<double> vals;
std::generate_n(std::back_inserter(vals), w.size(), [&dist,&gen]() { return dist(gen); });
std::transform(vals.begin(), vals.end(), w.begin(), vals.begin(), [&](auto r, auto w) { return std::pow(r, 1. / w); });
std::vector<std::pair<double, int>> valIndices;
size_t index = 0;
std::transform(vals.begin(), vals.end(), std::back_inserter(valIndices), [&index](auto v) { return std::pair<double,size_t>(v,index++); });
std::sort(valIndices.begin(), valIndices.end(), [](auto x, auto y) { return x.first > y.first; });
std::vector<int> samples;
std::transform(valIndices.begin(), valIndices.end(), std::back_inserter(samples), [](auto v) { return v.second; });
for (auto iter : samples) {
std::cout << iter << " ";
}
return 0;
}
Easier answer
I just removed some of the STL functions and replaced it with simple for loops.
#include <iostream>
#include <iterator>
#include <boost/random/uniform_01.hpp>
#include <boost/random/mersenne_twister.hpp>
#include <algorithm>
using namespace boost::random;
int main(int, char**) {
std::vector<double> w = { 2, 2, 1, 1, 2, 2, 1, 1, 2, 1000 };
uniform_01<> dist;
boost::random::mt19937 gen(342575235);
std::vector<double> vals;
for (auto iter : w) {
vals.push_back(std::pow(dist(gen), 1. / iter));
}
// Sorting vals, but retain the indices.
// There is unfortunately no easy way to do this with STL.
std::vector<std::pair<int, double>> valsWithIndices;
for (size_t iter = 0; iter < vals.size(); iter++) {
valsWithIndices.emplace_back(iter, vals[iter]);
}
std::sort(valsWithIndices.begin(), valsWithIndices.end(), [](auto x, auto y) {return x.second > y.second; });
std::vector<size_t> samples;
int sampleSize = 8;
for (auto iter = 0; iter < sampleSize; iter++) {
samples.push_back(valsWithIndices[iter].first);
}
for (auto iter : samples) {
std::cout << iter << " ";
}
return 0;
}