I'm working in a project where I need to calculate the square root of a 16-bit number. I'm using a NXP micro-controller that doesn't have support for float point operation. Looking online I found the following approach Fixed-Point Square Root by Ken Turkowsky from Apple. I also found the same algorithm with some modifications around the WEB including here: Looking for an efficient integer square root algorithm for ARM Thumb2 I'm copying the code here in order to make it available in case of a broken link:
void put_space(unsigned int number)
{
int i;
char spaces = 12;
char buffer[15];
char number_of_chars = sprintf(buffer,"%u",number);
for(i=0;i<spaces-number_of_chars;i++)
{
printf(" ");
}
}
void print_binary(unsigned int number)
{
int i;
unsigned char bit;
for(i=0;i<32;i++)
{
if(i%8 == 0 && i!=0)
{
printf(" ");
}
bit = (number & (2147483648 >> i)) == (2147483648 >> i);
printf("%u",bit);
}
}
void print_number(unsigned int number)
{
print_binary(number);
printf(" ");
printf("(%u)",number);
put_space(number);
}
typedef signed int TFract; /* 2 integer bits, 30 fractional bits */
TFract FFracSqrt(TFract x)
{
register unsigned int root, remHi, remLo, testDiv, count;
root = 0; /* Clear root */ printf("root: "); print_number(root);
remHi = 0; /* Clear high part of partial remainder */ printf("remHi: "); print_number(remHi);
remLo = x; /* Get argument into low part of partial remainder */ printf("remLo: "); print_number(remLo);
count = 30; /* Load loop counter */
printf("\n\n");
do
{
remHi = (remHi<<2) | (remLo>>30); printf("remHi: "); print_number(remHi);
remLo <<= 2; /* get 2 bits of arg */ printf("remLo: "); print_number(remLo);
root <<= 1; /* Get ready for the next bit in the root */ printf("root: "); print_number(root);
testDiv = (root << 1) + 1; /* Test radical */ printf("testDiv: "); print_number(testDiv);
if (remHi >= testDiv)
{
remHi -= testDiv; printf("remHi: "); print_number(remHi);
root++; printf("root: "); print_number(root);
}
printf("\n");
} while (count-- != 0);
printf("\n\nResult: %u\n\n",root);
return(root);
}
I can understand what is going on in the algorithm including the shifts and the OR operation. However, I can't comprehend the mathematics behind this code. FFracSqrt receives a "x" integer number as parameter and returns floor(sqrt(x)*32768). How does that happen? Please, help me and other people to understand it because I couldn't find anything about the theory so far. Sincerely,
I'm adding here the output when input is 2:
Before do-while loop
root: 00000000 00000000 00000000 00000000 (0)
remHi: 00000000 00000000 00000000 00000000 (0)
remLo: 00000000 00000000 00000000 00000010 (2)
Into do-while loop
remHi: 00000000 00000000 00000000 00000000 (0)
remLo: 00000000 00000000 00000000 00001000 (8)
root: 00000000 00000000 00000000 00000000 (0)
testDiv: 00000000 00000000 00000000 00000001 (1)
remHi: 00000000 00000000 00000000 00000000 (0)
remLo: 00000000 00000000 00000000 00100000 (32)
root: 00000000 00000000 00000000 00000000 (0)
testDiv: 00000000 00000000 00000000 00000001 (1)
remHi: 00000000 00000000 00000000 00000000 (0)
remLo: 00000000 00000000 00000000 10000000 (128)
root: 00000000 00000000 00000000 00000000 (0)
testDiv: 00000000 00000000 00000000 00000001 (1)
remHi: 00000000 00000000 00000000 00000000 (0)
remLo: 00000000 00000000 00000010 00000000 (512)
root: 00000000 00000000 00000000 00000000 (0)
testDiv: 00000000 00000000 00000000 00000001 (1)
remHi: 00000000 00000000 00000000 00000000 (0)
remLo: 00000000 00000000 00001000 00000000 (2048)
root: 00000000 00000000 00000000 00000000 (0)
testDiv: 00000000 00000000 00000000 00000001 (1)
remHi: 00000000 00000000 00000000 00000000 (0)
remLo: 00000000 00000000 00100000 00000000 (8192)
root: 00000000 00000000 00000000 00000000 (0)
testDiv: 00000000 00000000 00000000 00000001 (1)
remHi: 00000000 00000000 00000000 00000000 (0)
remLo: 00000000 00000000 10000000 00000000 (32768)
root: 00000000 00000000 00000000 00000000 (0)
testDiv: 00000000 00000000 00000000 00000001 (1)
remHi: 00000000 00000000 00000000 00000000 (0)
remLo: 00000000 00000010 00000000 00000000 (131072)
root: 00000000 00000000 00000000 00000000 (0)
testDiv: 00000000 00000000 00000000 00000001 (1)
remHi: 00000000 00000000 00000000 00000000 (0)
remLo: 00000000 00001000 00000000 00000000 (524288)
root: 00000000 00000000 00000000 00000000 (0)
testDiv: 00000000 00000000 00000000 00000001 (1)
remHi: 00000000 00000000 00000000 00000000 (0)
remLo: 00000000 00100000 00000000 00000000 (2097152)
root: 00000000 00000000 00000000 00000000 (0)
testDiv: 00000000 00000000 00000000 00000001 (1)
remHi: 00000000 00000000 00000000 00000000 (0)
remLo: 00000000 10000000 00000000 00000000 (8388608)
root: 00000000 00000000 00000000 00000000 (0)
testDiv: 00000000 00000000 00000000 00000001 (1)
remHi: 00000000 00000000 00000000 00000000 (0)
remLo: 00000010 00000000 00000000 00000000 (33554432)
root: 00000000 00000000 00000000 00000000 (0)
testDiv: 00000000 00000000 00000000 00000001 (1)
remHi: 00000000 00000000 00000000 00000000 (0)
remLo: 00001000 00000000 00000000 00000000 (134217728)
root: 00000000 00000000 00000000 00000000 (0)
testDiv: 00000000 00000000 00000000 00000001 (1)
remHi: 00000000 00000000 00000000 00000000 (0)
remLo: 00100000 00000000 00000000 00000000 (536870912)
root: 00000000 00000000 00000000 00000000 (0)
testDiv: 00000000 00000000 00000000 00000001 (1)
remHi: 00000000 00000000 00000000 00000000 (0)
remLo: 10000000 00000000 00000000 00000000 (2147483648)
root: 00000000 00000000 00000000 00000000 (0)
testDiv: 00000000 00000000 00000000 00000001 (1)
remHi: 00000000 00000000 00000000 00000010 (2)
remLo: 00000000 00000000 00000000 00000000 (0)
root: 00000000 00000000 00000000 00000000 (0)
testDiv: 00000000 00000000 00000000 00000001 (1)
If condition TRUE
remHi: 00000000 00000000 00000000 00000001 (1)
root: 00000000 00000000 00000000 00000001 (1)
remHi: 00000000 00000000 00000000 00000100 (4)
remLo: 00000000 00000000 00000000 00000000 (0)
root: 00000000 00000000 00000000 00000010 (2)
testDiv: 00000000 00000000 00000000 00000101 (5)
remHi: 00000000 00000000 00000000 00010000 (16)
remLo: 00000000 00000000 00000000 00000000 (0)
root: 00000000 00000000 00000000 00000100 (4)
testDiv: 00000000 00000000 00000000 00001001 (9)
If condition TRUE
remHi: 00000000 00000000 00000000 00000111 (7)
root: 00000000 00000000 00000000 00000101 (5)
remHi: 00000000 00000000 00000000 00011100 (28)
remLo: 00000000 00000000 00000000 00000000 (0)
root: 00000000 00000000 00000000 00001010 (10)
testDiv: 00000000 00000000 00000000 00010101 (21)
If condition TRUE
remHi: 00000000 00000000 00000000 00000111 (7)
root: 00000000 00000000 00000000 00001011 (11)
remHi: 00000000 00000000 00000000 00011100 (28)
remLo: 00000000 00000000 00000000 00000000 (0)
root: 00000000 00000000 00000000 00010110 (22)
testDiv: 00000000 00000000 00000000 00101101 (45)
remHi: 00000000 00000000 00000000 01110000 (112)
remLo: 00000000 00000000 00000000 00000000 (0)
root: 00000000 00000000 00000000 00101100 (44)
testDiv: 00000000 00000000 00000000 01011001 (89)
If condition TRUE
remHi: 00000000 00000000 00000000 00010111 (23)
root: 00000000 00000000 00000000 00101101 (45)
remHi: 00000000 00000000 00000000 01011100 (92)
remLo: 00000000 00000000 00000000 00000000 (0)
root: 00000000 00000000 00000000 01011010 (90)
testDiv: 00000000 00000000 00000000 10110101 (181)
remHi: 00000000 00000000 00000001 01110000 (368)
remLo: 00000000 00000000 00000000 00000000 (0)
root: 00000000 00000000 00000000 10110100 (180)
testDiv: 00000000 00000000 00000001 01101001 (361)
If condition TRUE
remHi: 00000000 00000000 00000000 00000111 (7)
root: 00000000 00000000 00000000 10110101 (181)
remHi: 00000000 00000000 00000000 00011100 (28)
remLo: 00000000 00000000 00000000 00000000 (0)
root: 00000000 00000000 00000001 01101010 (362)
testDiv: 00000000 00000000 00000010 11010101 (725)
remHi: 00000000 00000000 00000000 01110000 (112)
remLo: 00000000 00000000 00000000 00000000 (0)
root: 00000000 00000000 00000010 11010100 (724)
testDiv: 00000000 00000000 00000101 10101001 (1449)
remHi: 00000000 00000000 00000001 11000000 (448)
remLo: 00000000 00000000 00000000 00000000 (0)
root: 00000000 00000000 00000101 10101000 (1448)
testDiv: 00000000 00000000 00001011 01010001 (2897)
remHi: 00000000 00000000 00000111 00000000 (1792)
remLo: 00000000 00000000 00000000 00000000 (0)
root: 00000000 00000000 00001011 01010000 (2896)
testDiv: 00000000 00000000 00010110 10100001 (5793)
remHi: 00000000 00000000 00011100 00000000 (7168)
remLo: 00000000 00000000 00000000 00000000 (0)
root: 00000000 00000000 00010110 10100000 (5792)
testDiv: 00000000 00000000 00101101 01000001 (11585)
remHi: 00000000 00000000 01110000 00000000 (28672)
remLo: 00000000 00000000 00000000 00000000 (0)
root: 00000000 00000000 00101101 01000000 (11584)
testDiv: 00000000 00000000 01011010 10000001 (23169)
If condition TRUE
remHi: 00000000 00000000 00010101 01111111 (5503)
root: 00000000 00000000 00101101 01000001 (11585)
remHi: 00000000 00000000 01010101 11111100 (22012)
remLo: 00000000 00000000 00000000 00000000 (0)
root: 00000000 00000000 01011010 10000010 (23170)
testDiv: 00000000 00000000 10110101 00000101 (46341)
remHi: 00000000 00000001 01010111 11110000 (88048)
remLo: 00000000 00000000 00000000 00000000 (0)
root: 00000000 00000000 10110101 00000100 (46340)
testDiv: 00000000 00000001 01101010 00001001 (92681)
Result: 46340