I am writing a quiz/teaching game in HTML5/JS in which the player is offered a set of 10 questions from a larger mastery set. The game keeps track of the players' scores over time and is more likely to choose questions from the question list that the player is having trouble with.
To construct the probability distribution list, I employ the alias method as below, which selects an item in O(1) time while completely adhering to distribution:
function generate_random_question_selector() {
// Generates a random selector function using the Alias Method
// for discrete probability distributions (see
// https://en.wikipedia.org/wiki/Alias_method for an explanation)
var i = 0;
var probabilities = [], aliases = [];
var probSum = 0;
/* ... Business logic to fill probabilities array ... */
// Normalize all probabilities to average to 1
// and categorize each probability as to where it fits
// in that scale
var probMultiplier = probabilities.length / probSum;
var overFull = [], underFull = [];
probabilities = probabilities.map(function(p, i) {
var newP = p * probMultiplier;
if (newP > 1) overFull.push(i);
else if (newP < 1) underFull.push(i);
else if (newP !== 1) {
throw "Non-numerical value got into scores";
}
return newP;
});
overFull.sort();
underFull.sort();
// Process both queues by having each under-full entry
// have the rest of its space occupied by the fullest
// over-full entry, re-categorizing the over-full entry
// as needed
while (overFull.length > 0 || underFull.length > 0) {
if (!(overFull.length > 0 && underFull.length > 0)) {
// only reached due to rounding errors.
// Just assign all the remaining probabilities to 1
var notEmptyArray = overFull.length > 0 ? overFull : underFull;
notEmptyArray.forEach(function(index) {
probabilities[index] = 1;
});
break; // get out of the while loop
}
aliases[underFull[0]] = overFull[0];
probabilities[overFull[0]] += probabilities[underFull[0]] - 1;
underFull.shift();
if (probabilities[overFull[0]] > 1) overFull.push(overFull.shift());
else if (probabilities[overFull[0]] < 1) underFull.push(overFull.shift());
else overFull.shift();
}
return function() {
var index = Math.floor(Math.random() * probabilities.length);
return Math.random() < probabilities[index] ? index : aliases[index];
}
}
This method works really well, but part of my business specifications are that the questions do not repeat. I currently use a naive reroll technique to accomplish this, but it's obvious that this will break if less than 10 items are far more probable than the rest of the items:
var selectQuestion = generate_random_question_selector();
var questionSet = [];
for (var i = 0; i < num_questions; i++) {
var question_num;
do {
question_num = selectQuestion();
} while (questionSet.indexOf(question_num) >= 0)
questionSet.push(question_num);
}
What can be done to or about this method that would allow me to efficiently sample the questions without replacement?