-3

I have converted this Java program into a C# program.

using System;
using System.Collections.Generic;

namespace RandomNumberWith_Distribution__Test
{
    public class DistributedRandomNumberGenerator
    {

        private Dictionary<Int32, Double> distribution;
        private double distSum;

        public DistributedRandomNumberGenerator()
        {
            distribution = new Dictionary<Int32, Double>();
        }

        public void addNumber(int val, double dist)
        {
            distribution.Add(val, dist);// are these two
            distSum += dist;            // lines correctly translated?
        }

        public int getDistributedRandomNumber()
        {
            double rand = new Random().NextDouble();//generate a double random number
            double ratio = 1.0f / distSum;//why is ratio needed?
            double tempDist = 0;

            foreach (Int32 i in distribution.Keys)
            {
                tempDist += distribution[i];

                if (rand / ratio <= tempDist)//what does "rand/ratio" signify? What does this comparison achieve?
                {
                    return i;
                }
            }
            return 0;
        }
    }

    public class MainClass
    {
        public static void Main(String[] args)
        {
            DistributedRandomNumberGenerator drng = new DistributedRandomNumberGenerator();
            drng.addNumber(1, 0.2d);
            drng.addNumber(2, 0.3d);
            drng.addNumber(3, 0.5d);

            //================= 
            // start simulation
            int testCount = 1000000;
            Dictionary<Int32, Double> test = new Dictionary<Int32, Double>();

            for (int i = 0; i < testCount; i++)
            {
                int random = drng.getDistributedRandomNumber(); 

                if (test.ContainsKey(random)) 
                {
                    double prob = test[random];   // are these
                    prob = prob + 1.0 / testCount;// three lines
                    test[random] = prob;          // correctly translated?
                }
                else
                {
                    test.Add(random, 1.0 / testCount);// is this line correctly translated?
                }
            }

            foreach (var item in test.Keys)
            {
                Console.WriteLine($"{item}, {test[item]}");
            }

            Console.ReadLine();
        }
    }
}

I have several questions:

  1. Can you check if the marked-by-comment lines are correct or need explanation?
  2. Why doesn't getDistributedRandomNumber() check if the sum of the distribution 1 before proceeding to further calculations?
user366312
  • 16,949
  • 65
  • 235
  • 452

1 Answers1

3

The method

public void addNumber(int val, double dist)

Is not correctly translated, since you are missing the following lines:

if (this.distribution.get(value) != null) {
    distSum -= this.distribution.get(value);
}

Those lines should cover the case when you call the following (based on your example code):

DistributedRandomNumberGenerator drng = new DistributedRandomNumberGenerator();
drng.addNumber(1, 0.2d);
drng.addNumber(1, 0.5d);

So calling the method addNumber twice with the same first argument, the missing code part looks if the first argument is already present in the dictionary and if so it will remove the "old" value from the dictionary to insert the new value.

Your method should look like this:

public void addNumber(int val, double dist)
{
    if (distribution.TryGetValue(val, out var oldDist)) //get the old "dist" value, based on the "val"
    {
        distribution.Remove(val); //remove the old entry
        distSum -= oldDist; //substract "distSum" with the old "dist" value
    }

    distribution.Add(val, dist); //add the "val" with the current "dist" value to the dictionary
    distSum += dist; //add the current "dist" value to "distSum"
}

Now to your second method

public int getDistributedRandomNumber()

Instead of calling initializing a new instance of Random every time this method is called you should only initialize it once, so change the line

double rand = new Random().NextDouble();

to this

double rand = _random.NextDouble();

and initialize the field _random outside of a method inside the class declaration like this

public class DistributedRandomNumberGenerator
{
    private Dictionary<Int32, Double> distribution;
    private double distSum;
    private Random _random = new Random();        


    ... rest of your code
}

This will prevent new Random().NextDouble() from producing the same number over and over again if called in a loop. You can read about this problem here: Random number generator only generating one random number

As I side note, fields in c# are named with a prefix underscore. You should consider renaming distribution to _distribution, same applies for distSum.


Next:

double ratio = 1.0f / distSum;//why is ratio needed?

Ratio is need because the method tries its best to do its job with the information you have provided, imagine you only call this:

DistributedRandomNumberGenerator drng = new DistributedRandomNumberGenerator();
drng.addNumber(1, 0.2d);
int random = drng.getDistributedRandomNumber(); 

With those lines you told the class you want to have the number 1 in 20% of the cases, but what about the other 80%?

And that's where the ratio variable comes in place, it calculates a comparable value based on the sum of probabilities you have given. eg.

double ratio = 1.0f / distSum;

As with the latest example drng.addNumber(1, 0.2d); distSum will be 0.2, which translates to a probability of 20%.

double ratio = 1.0f / 0.2;

The ratio is 5.0, with a probability of 20% the ratio is 5 because 100% / 5 = 20%.

Now let's have a look at how the code reacts when the ratio is 5

double tempDist = 0;
foreach (Int32 i in distribution.Keys)
{
    tempDist += distribution[i];

    if (rand / ratio <= tempDist)
    {
        return i;
    }
}

rand will be to any given time a value that is greater than or equal to 0.0, and less than 1.0., that's how NextDouble works, so let's assume the following 0.254557522132321 as rand.

Now let's look what happens step by step

double tempDist = 0; //initialize with 0 
foreach (Int32 i in distribution.Keys) //step through the added probabilities
{
    tempDist += distribution[i]; //get the probabilities and add it to a temporary probability sum

    //as a reminder
    //rand = 0.254557522132321
    //ratio = 5
    //rand / ratio = 0,0509115044264642
    //tempDist = 0,2
    // if will result in true
    if (rand / ratio <= tempDist)
    {
        return i;
    }
}

If we didn't apply the ratio the if would be false, but that would be wrong, since we only have a single value inside our dictionary, so no matter what the rand value might be the if statement should return true and that's the natur of rand / ratio.

To "fix" the randomly generated number based on the sum of probabilities we added. The rand / ratio will only be usefull if you didn't provide probabilites that perfectly sum up to 1 = 100%.

eg. if your example would be this

DistributedRandomNumberGenerator drng = new DistributedRandomNumberGenerator();
drng.addNumber(1, 0.2d);
drng.addNumber(2, 0.3d);
drng.addNumber(3, 0.5d);

You can see that the provided probabilities equal to 1 => 0.2 + 0.3 + 0.5, in this case the line

if (rand / ratio <= tempDist)

Would look like this

if (rand / 1 <= tempDist)

Divding by 1 will never change the value and rand / 1 = rand, so the only use case for this devision are cases where you didn't provided a perfect 100% probability, could be either more or less.

As a side note, I would suggest changing your code to this

//call the dictionary distributions (notice the plural)
//dont use .Keys
//var distribution will be a KeyValuePair
foreach (var distribution in distributions)
{
    //access the .Value member of the KeyValuePair
    tempDist += distribution.Value;

    if (rand / ratio <= tempDist)
    {
        return i;
    }
}

Your test routine seems to be correctly translated.

user366312
  • 16,949
  • 65
  • 235
  • 452
Rand Random
  • 7,300
  • 10
  • 40
  • 88
  • Why `prob = prob + 1.0 / testCount;` needed? What does `1.0 / testCount` achieve? And can you please explain what is the logic behind this code, what is it named if I want to search about it? (like inverse cumulative distribution, etc.?) I couldn't get how is it serving its job.. – noobie Mar 21 '22 at 20:56
  • 1
    `1.0 / testCount` creates a percentage of each "item" of `testCount` - eg. if you have 25 apples and you want to know what the percentage 1 apple is of the total amount, you use this formula. How can 100% be 25 apples? You need to figure out by what number you need to inflate the other side to get 100 so. 25 * x = 100, to get x you can do 100 / 25 = x which gives you 4% - or as in the expample you aren't using percentages but there numeric representation so 4% = 0.04, so know you know that each apple represents 4% because 4% times 25 apples = 100% – Rand Random Mar 22 '22 at 09:23
  • 1
    `prob = prob + 1.0 / testCount;` now simply builds a sum of all those individual "items", so eg. you have a condition of if (apple = green) and 7 out of those 25 apples would be green, this could would give a end sum of 28%, because everytime the condition was true it added 4% to the sum – Rand Random Mar 22 '22 at 09:27
  • 1
    this has the sole purpose to verify if the propablity doesn't exceed the specified values. so in OPs code `drng.addNumber(2, 0.3d);` - checks if `2` wasn't distributed more often then 30% of the times – Rand Random Mar 22 '22 at 09:29
  • 1
    this seems correct: https://en.wikipedia.org/wiki/Probability_distribution – Rand Random Mar 22 '22 at 09:31
  • @noobie - see comments above – Rand Random Mar 22 '22 at 09:31