1

I have implemented a mandelbrot-explorer in OpenGL. For deeper zooms I'm trying to implement double-double-arithmetic. I have working code for that in C++ but - if I haven't overlooked something - the exact same code only produces double-precision in GLSL.

C++-code for multiplication:

igdd quickTwoSum(double a, double b)
    {
        igdd Temp;
        Temp.hi = a + b;
        Temp.lo = b - (Temp.hi - a);
        return Temp;
    }

igdd Split64(double d)
    {
        const double SPLITTER = (1 << 29) + 1;
        double t = d * SPLITTER;
        igdd Temp;
        Temp.hi = t - (t - d);
        Temp.lo = d - Temp.hi;
        return Temp;
    }

igdd twoProd(double a, double b)
    {
        igdd p;
        p.hi = a * b;
        igdd aS = Split64(a);
        igdd bS = Split64(b);
        p.lo = ((aS.hi * bS.hi - p.hi) + aS.hi * bS.lo + aS.lo * bS.hi + aS.lo * bS.lo);
        return p;
    }

igdd operator*(igdd a)
    {
        igdd Temp;
        Temp = twoProd(a.hi, this->hi);
        Temp.lo += a.hi * this->lo + a.lo * this->hi;
        Temp = quickTwoSum(Temp.hi, Temp.lo);
        return Temp;
    }

Open-GL-code:

dvec2 Split64(double d)
{
    const double SPLITTER = (1 << 29) + 1;
    double t = d * SPLITTER;
    dvec2 result;
    result.x = t - (t - d);
    result.y = d - result.x;

    return result;
}

dvec2 quickTwoSum(double a, double b)
{
    dvec2 result;
    result.x = a + b;
    result.y = b - (result.x - a);
    return result;
}

dvec2 twoProd(double a, double b)
{
    dvec2 p;
    p.x = a * b;
    dvec2 aS = Split64(a);
    dvec2 bS = Split64(b);
    p.y = (aS.x * bS.x - p.x) + aS.x * bS.y + aS.y * bS.x + aS.y * bS.y;
    return p;
}

dvec2 df128_mul(dvec2 a, dvec2 b)
{
    dvec2 p;

    p = twoProd(a.x, b.x);
    p.y += a.x * b.y + a.y * b.x;
    p = quickTwoSum(p.x, p.y);
    return p;
}

For testing purposes I did this in both C++

igdd Testdd; // The double-double class in C++
Testdd.lo = 0.0000000000000000000000001;
Testdd.hi = 1.0;
std::cout.precision(17);
for (int i = 0; i < 95; i++)
{
    Testdd = Testdd * Testdd;
    std::cout << std::fixed << Testdd.hi << std::endl;
}

... and OpenGL

dvec2 XR;
XR.y = 0.0000000000000000000000001;
XR.x = 1.0;
for (i = 0; i < 95; i++) // I tried different values for 95 here
{
    XR = df128_mul(XR, XR);
}

If I keep squaring the number, in C++ at some point you can see it getting bigger (after initially it's too many zeros). GLSL always produces one (or zero - I did number-1 before output).

So is there something weird going on with doubles (or maybe dvec2) in GLSL or have I overlooked something? Also: When calculating the mandelbrot-set with double-doubles, the CPU is REALLY slow. The GPU produces exactly the right pictures (until I zoom in to about 10^-14) and does so suspiciously fast...

Rabbid76
  • 202,892
  • 27
  • 131
  • 174
Paul Aner
  • 361
  • 1
  • 8
  • 1
    OpenGL is not a language. You mean GLSL? – Thomas Jan 01 '22 at 10:46
  • Yes, of course. – Paul Aner Jan 01 '22 at 10:59
  • Maybe one uses the equivalent of `-ffast-math` by default and not the other? – Marc Glisse Jan 01 '22 at 11:18
  • By the way, on platforms that have a native FMA, doing a split for twoProd is a waste of time. – Marc Glisse Jan 01 '22 at 11:20
  • I didn't do anything -ffast-math-y so everything here is standard. I know about FMA, but for now I just want the code to work. Optimizing comes later... ;-) – Paul Aner Jan 01 '22 at 11:24
  • 3
    note that while `C++` use the suffix `f` to denote single precision literals, and default with no suffix is double precison, GLSL uses float as default precision, and you have to use `lf` suffix for doubles. – derhass Jan 01 '22 at 13:16
  • 1
    @derhass OK, I didn't know that. But the only two lines in my code where this might make a difference are XR.y = 0.0000000000000000000000001; and XR.x = 1.0; I put "lf" behind those, but it didn't make a difference... – Paul Aner Jan 01 '22 at 13:39
  • 1
    "Maybe one uses the equivalent of `-ffast-math` by default" That's correct, GLSL defaults to doing all those optimisations, whereas C and C++ do not do them by default. But as seen in @PaulAner's own answer to the question, there are ways to disable that. – Andrea Jan 02 '22 at 19:04

1 Answers1

2

I finally found the problem. The compiler optimized away precision that was needed. I found another code for multipliying that produced the exact same result (double-precision - not "double-double"). For some reason that code seemed to work for the guy but not for me.

What solved the problem was using the "precise" keyword. I plastered everything with it and now it works - although of course much slower:

precise dvec2 quickTwoSum(precise double a, precise double b)
{
    precise dvec2 result;
    result.x = a + b;
    result.y = b - (result.x - a);
    return result;
}

precise dvec2 twoProd(precise double a, precise double b)
{
    precise dvec2 p;
    p.x = a * b;
    precise dvec2 aS = Split64(a);
    precise dvec2 bS = Split64(b);
    p.y = (aS.x * bS.x - p.x) + aS.x * bS.y + aS.y * bS.x + aS.y * bS.y;
    return p;
}

precise dvec2 df128_mul(precise dvec2 a, precise dvec2 b)
{
    precise dvec2 p;
    p = twoProd(a.x, b.x);
    p.y += a.x * b.y + a.y * b.x;
    p = quickTwoSum(p.x, p.y);

    return p;
}
Paul Aner
  • 361
  • 1
  • 8