The following program may determine whether tininess is reported before or after rounding.
#include <fenv.h>
#include <float.h>
_Static_assert(FLT_RADIX == 2, "This program expects binary floating-point.");
typedef float Float;
enum { // Change FLT prefix according to type set for Float, above.
Precision = FLT_MANT_DIG, // Number of bits in significand.
MinimumExponent = FLT_MIN_EXP-1, // Minimum normal exponent.
/* The -1 is due to C's definition of floating-point exponents being
for significands in [1/2, 1) instead of [1, 2).
*/
};
// Use the following if your compiler supports it. Not all do.
//#pragma STDC FENV_ACCESS ON
// Report true iff a*b reports underflow.
static _Bool ProductUnderflows(Float a, Float b)
{
feclearexcept(FE_ALL_EXCEPT);
volatile Float c;
c = a*b;
return fetestexcept(FE_UNDERFLOW);
}
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
int main(void)
{
if (fesetround(FE_TONEAREST) != 0)
{
fprintf(stderr, "Error, cannot set rounding mode to nearest.\n");
exit(EXIT_FAILURE);
}
/* Find the least positive integer that does not divide the number of bits
in a significand (also called p or the precision of the type).
*/
int q = 1;
while (Precision % q == 0)
++q;
// Set a to a string of q bits after the radix point.
Float a = 1 - ldexp(1, -q);
/* Consider 1/a. This necessarily rounds down and sets b to a repeating
pattern of a 1 bit followed by q-1 0 bits.
To see that it rounds down, consider the binary representation of the
mathematical quotient 1/a. It is a repeating pattern of a 1 bit
followed by q-1 0 bits. So the 1 bits land at offsets from the first 1
bit of q, 2q, 3q, and so on. So they only land at multiples of q. And
we know p is not a multiple of q, so there is no 1 bit at the position
p bits beyond the leading bit. In other words, the first bit that is
does not fit in the p-bit significand is 0. So the residue being
discarded during rounding is less than 1/2 ULP, so round-to-nearest
rounds down.
We set b to 1/a scaled so that a*b is just below the normal range.
Then the mathematical product of a and b has a significand of
ceil(p/q)*q 1 bits, which is greater than p, so the product must be
rounded to fit in a signifcand. In round-to-nearest-ties-to-even mode,
it will round upward, so the floating-point product of a and b will be
the smallest normal number. Therefore, there is an underflow if
tininess is detected before rounding but not if it is detected after
rounding.
*/
Float b = ldexp(1/a, MinimumExponent);
printf("a = %a.\n", a);
printf("b = %a.\n", b);
/* Test that we hit the boundary correctly: (a/2)*b underflows but
(2*a)*b does not. Also test that underflow reporting works.
*/
if (!ProductUnderflows(a/2, b))
{
fprintf(stderr,
"Internal error, %a * %a -> %a is expected to underflow but did not.\n",
a/2, b, (a/2)*b);
exit(EXIT_FAILURE);
}
if (ProductUnderflows(2*a, b))
{
fprintf(stderr,
"Internal error, %a * %a -> %a is expected not to underflow but did.\n",
2*a, b, (2*a)*b);
exit(EXIT_FAILURE);
}
// Test whether tininess is detected before or after rounding.
printf("Tininess is detected %s rounding.\n",
ProductUnderflows(a, b) ? "before" : "after");
}