17

Here is a simple way to calculate an integer square root:

int isqrt(int num)
{
    int root=0;
    int b = 0x8000;
    int a=0, c=0;

    while (b) {
        c = a|b;

        if (c*c <= num)
            a |= b;   

        b >>= 1;
    }       
}

Ingeniously (thanks Wikipedia), this can be optimised like this:

int sqrt(short num)
{
    int op = num;
    int res = 0;
    int one = 1 << 30;

    while (one > op)
        one >>= 2;

    while (one != 0) {
        if (op >= res + one) {
            op -= res + one;
            res = (res >> 1) + one;
        }
        else
          res >>= 1;
        one >>= 2;
    }
    return res;
}

My question: Can a similarly optimised algorithm be written for an integer cube root? (This is to be run on a small microcontroller which prefers not to do multiplications)

dav_i
  • 27,509
  • 17
  • 104
  • 136
Rocketmagnet
  • 5,656
  • 8
  • 36
  • 47
  • 1
    Not that I know of, but you can generate the third powers of successive integers by adding 7, 19, 37, 61 etc and you can get those numbers by adding 12, 18, 24, 30, 36 etc. It's not especially smart or fast but considering the integer cube root of 2^32 is still only 1625, it shouldn't take that many iterations (all of which consist of a couple of adds and a compare, no mults). edit: so it turns out there is a way. good to know! – harold Jul 10 '11 at 13:28
  • Yes, the algorithm can be extended to cube-roots, even without multiplications. See this code: http://www.hackersdelight.org/HDcode/icbrt.c.txt And consider to buy the book Hackers Delight where he code comes from. If you have to solve such problems more often than once a year you should definitely read it! – Nils Pipenbrinck Jul 10 '11 at 13:28

2 Answers2

10

According to this SO question and to the answer marked, from the Hacker's Delight book you can find this implementation:

int icbrt2(unsigned x) {
   int s;
   unsigned y, b, y2;

   y2 = 0;
   y = 0;
   for (s = 30; s >= 0; s = s - 3) {
      y2 = 4*y2;
      y = 2*y;
      b = (3*(y2 + y) + 1) << s;
      if (x >= b) {
         x = x - b;
         y2 = y2 + 2*y + 1;
         y = y + 1;
      }
   }
   return y;
}
Community
  • 1
  • 1
Jack
  • 131,802
  • 30
  • 241
  • 343
3

This is an (extreme) C# optimized version of the Hacker's Delight code, as mentioned by others.

For reference (on my pc): Math.Sqrt takes about 35 ns, cbrt < 15 ns.

Multiplications by small numbers are used, but it's easy to replace them with shifts and adds. For example the largest multipication (last line): "12 * z" ==> "(z << 3) + (z << 2)"

It's difficult to judge whether the size of the code is acceptable for a small microcontroller.

First step: A binary search, the "if" statements, large values ( >= 1u << 24 ) are found relatively faster, small values ( < 64 ) are handled during the search.

Second step: A jump into the unrolled loop, the "labels".

private static uint cbrt(uint x)
{
    uint y = 2, z = 4, b = 0;
    if (x < 1u << 24)
        if (x < 1u << 12)
            if (x < 1u << 06)
                if (x < 1u << 03)
                    return x == 0u ? 0u : 1u;
                else
                    return x < 27u ? 2u : 3u;
            else
                if (x < 1u << 09) goto L8; else goto L7;
        else
            if (x < 1u << 18)
                if (x < 1u << 15) goto L6; else goto L5;
            else
                if (x < 1u << 21) goto L4; else goto L3;
    else
        if (x >= 1u << 30) goto L0;
        else
            if (x < 1u << 27) goto L2; else goto L1;

L0: x -= 1u << 30; if (x >= 19u << 27)
    { x -= 19u << 27; z = 9; y = 3; } goto M0;
L1: x -= 1u << 27; if (x >= 19u << 24)
    { x -= 19u << 24; z = 9; y = 3; } goto M1;
L2: x -= 1u << 24; if (x >= 19u << 21)
    { x -= 19u << 21; z = 9; y = 3; } goto M2;
L3: x -= 1u << 21; if (x >= 19u << 18)
    { x -= 19u << 18; z = 9; y = 3; } goto M3;
L4: x -= 1u << 18; if (x >= 19u << 15)
    { x -= 19u << 15; z = 9; y = 3; } goto M4;
L5: x -= 1u << 15; if (x >= 19u << 12)
    { x -= 19u << 12; z = 9; y = 3; } goto M5;
L6: x -= 1u << 12; if (x >= 19u << 09)
    { x -= 19u << 09; z = 9; y = 3; } goto M6;
L7: x -= 1u << 09; if (x >= 19u << 06)
    { x -= 19u << 06; z = 9; y = 3; } goto M7;
L8: x -= 1u << 06; if (x >= 19u << 03)
    { x -= 19u << 03; z = 9; y = 3; } goto M8;

M0: y *= 2; z *= 4; b = 3 * y + 3 * z + 1 << 24; 
    if (x >= b) { x -= b; z += 2 * y + 1; y += 1; }
M1: y *= 2; z *= 4; b = 3 * y + 3 * z + 1 << 21; 
    if (x >= b) { x -= b; z += 2 * y + 1; y += 1; }
M2: y *= 2; z *= 4; b = 3 * y + 3 * z + 1 << 18;
    if (x >= b) { x -= b; z += 2 * y + 1; y += 1; }
M3: y *= 2; z *= 4; b = 3 * y + 3 * z + 1 << 15;
    if (x >= b) { x -= b; z += 2 * y + 1; y += 1; }
M4: y *= 2; z *= 4; b = 3 * y + 3 * z + 1 << 12; 
    if (x >= b) { x -= b; z += 2 * y + 1; y += 1; }
M5: y *= 2; z *= 4; b = 3 * y + 3 * z + 1 << 09; 
    if (x >= b) { x -= b; z += 2 * y + 1; y += 1; }
M6: y *= 2; z *= 4; b = 3 * y + 3 * z + 1 << 06; 
    if (x >= b) { x -= b; z += 2 * y + 1; y += 1; }
M7: y *= 2; z *= 4; b = 3 * y + 3 * z + 1 << 03; 
    if (x >= b) { x -= b; z += 2 * y + 1; y += 1; }
M8: y *= 2; return x <= 3 * y + 12 * z ? y : y + 1;
}
P_P
  • 787
  • 7
  • 10