Why is the first piece of code much slower than the second?
public long Gen(int mod)
{
long a, b = 0x7fffffffffffffff - mod;
do
{
a = (long)(Gen() >> 1);
} while (a > b);
return a % mod;
}
public long Gen(int mod)
{
long a, b = 0x7fffffffffffffff - mod;
do
{
a = (long)(Gen() >> 1);
} while (a > b);
return a % 12345;
}
The gen function is a 64 bit unsigned PRNG (see below).
The problem is that the first piece of code is so much slower, that using a variable to calculate the modulus basically adds 3x the amount of time it takes to calculate just the random numbers! To add to the mystery, when you remove the loop, and calculate the modulus using a variable, the speed is similar to the second piece of code.
Something weird is going on around here, because you can't tell me that a modulus using a variable is several times as slow as this:
public ulong Gen()
{
counter = (counter + 1) & 3;
if (counter == 0)
{
state[0]++;
ulong x0 = state[0];
ulong x1 = state[1];
ulong x2 = state[2];
ulong x3 = state[3];
for (int i = 0; i < 2; i++)
{
x0 += x1; x1 ^= ((x0 << 32) | (x0 >> (64 - 32)));
x1 += x0; x0 ^= ((x1 << 32) | (x1 >> (64 - 32)));
x2 += x3; x3 ^= ((x2 << 32) | (x2 >> (64 - 32)));
x3 += x2; x2 ^= ((x3 << 32) | (x3 >> (64 - 32)));
x0 += x2; x2 ^= ((x0 << 27) | (x0 >> (64 - 27)));
x2 += x0; x0 ^= ((x2 << 27) | (x2 >> (64 - 27)));
x1 += x3; x3 ^= ((x1 << 27) | (x1 >> (64 - 27)));
x3 += x1; x1 ^= ((x3 << 27) | (x3 >> (64 - 27)));
x0 += x3; x3 ^= ((x0 << 11) | (x0 >> (64 - 11)));
x3 += x0; x0 ^= ((x3 << 11) | (x3 >> (64 - 11)));
x1 += x2; x2 ^= ((x1 << 11) | (x1 >> (64 - 11)));
x2 += x1; x1 ^= ((x2 << 11) | (x2 >> (64 - 11)));
}
block[0] = x0;
block[1] = x1;
block[2] = x2;
block[3] = x3;
}
return block[counter];
}
Minimal reproducible version as requested:
using System;
using System.Diagnostics;
class Program
{
static void Main(string[] args)
{
Stopwatch sw = new Stopwatch();
Arx rng = new Arx();
long a = 0;
// constant = fast
sw.Start();
for (int i = 0; i < 10000000; i++)
{
a += rng.GenConstant(123);
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
Console.WriteLine("{0:x16}", a);
sw.Reset();
// no loop = fast
sw.Start();
for (int i = 0; i < 10000000; i++)
{
a += rng.GenNoLoop(123);
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
Console.WriteLine("{0:x16}", a);
sw.Reset();
// modulus variable = slow
sw.Start();
for (int i = 0; i < 10000000; i++)
{
a += rng.GenVariable(123);
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds);
Console.WriteLine("{0:x16}", a);
sw.Reset();
}
}
class Arx
{
static public ulong[] state = new ulong[4];
static public ulong[] outBlock = new ulong[4];
static int counter = -1;
public Arx(ulong seed = 0)
{
if (seed == 0)
state[1] = (ulong)DateTime.UtcNow.Ticks;
else
state[1] = seed;
}
public ulong Gen()
{
counter = (counter + 1) & 3;
if (counter == 0)
{
state[0]++;
ulong x0 = state[0];
ulong x1 = state[1];
ulong x2 = state[2];
ulong x3 = state[3];
for (int i = 0; i < 2; i++)
{
x0 += x1; x1 ^= ((x0 << 32) | (x0 >> (64 - 32)));
x1 += x0; x0 ^= ((x1 << 32) | (x1 >> (64 - 32)));
x2 += x3; x3 ^= ((x2 << 32) | (x2 >> (64 - 32)));
x3 += x2; x2 ^= ((x3 << 32) | (x3 >> (64 - 32)));
x0 += x2; x2 ^= ((x0 << 27) | (x0 >> (64 - 27)));
x2 += x0; x0 ^= ((x2 << 27) | (x2 >> (64 - 27)));
x1 += x3; x3 ^= ((x1 << 27) | (x1 >> (64 - 27)));
x3 += x1; x1 ^= ((x3 << 27) | (x3 >> (64 - 27)));
x0 += x3; x3 ^= ((x0 << 11) | (x0 >> (64 - 11)));
x3 += x0; x0 ^= ((x3 << 11) | (x3 >> (64 - 11)));
x1 += x2; x2 ^= ((x1 << 11) | (x1 >> (64 - 11)));
x2 += x1; x1 ^= ((x2 << 11) | (x2 >> (64 - 11)));
}
outBlock[0] = x0;
outBlock[1] = x1;
outBlock[2] = x2;
outBlock[3] = x3;
}
return outBlock[counter];
}
public long GenConstant(int mod)
{
long a, b = 0x7fffffffffffffff - mod;
do
{
a = (long)(Gen() >> 1);
} while (a > b);
return a % 12345;
}
public long GenVariable(int mod)
{
long a, b = 0x7fffffffffffffff - mod;
do
{
a = (long)(Gen() >> 1);
} while (a > b);
return a % mod;
}
public long GenNoLoop(int mod)
{
long a = (long)(Gen() >> 1);
return a % mod;
}
}