How do you express a large integer?

Question

in C

#include <stdio.h>
#include <string.h>
#include <math.h>

int main() {
    unsigned long long result = 20170401000ULL * 20170401000;
    printf("%llu\n", result);
    return 0;
}

Output

1016706879190864448

Expected

406845076500801000000

[How to handle big integers in C](https://stackoverflow.com/questions/8709602/how-to-handle-big-integers-in-c) — Retired Ninja, Apr 22 '23 at 12:37
It depends on what you mean by that, @Kanz. You can write *your own* code for bignum calculations, especially if you need only one operation (multiplication). You don't need to engage a *third-party* library. And it may be that some implementations provide a built-in type large enough to hold that particular product. But any given C implementation has a limit to the magnitude of the values that its built-in types can accommodate. — John Bollinger, Apr 22 '23 at 12:43
If you are working on an online code challenge that seems to involve large numbers, it is likely one you are intended to solve by figuring out a way to do it without large numbers. — Eric Postpischil, Apr 22 '23 at 12:47
Thank you, everyone. I'll install the GMP library and try again — Kanz, Apr 22 '23 at 13:00
Side note: `math.h` has nothing to do with any of this. It primarily provides declarations for a bunch of floating-point functions such as `sin()` and `log()`. No special header is required for built-in arithmetic operations. — John Bollinger, Apr 22 '23 at 13:01
`unsigned` 64 bit integers go up to `18,446,744,073,709,551,615`, but I'm afraid this is still short to handle your numbers. — Luis Colorado, Apr 26 '23 at 06:56

chqrlie · Answer 1 · 2023-04-22T21:36:36.283

To handle numbers larger than the standard type unsigned long long, you can use different solutions:

you can use a bignum library such as GNU's gmp.
you can use a larger type if available on your system, such as __uint128_t.
you can slice the operands into chunks for which the standard types can handle the results without overflow or wrap around.

Here is an example of (2):

#include <stdio.h>

int main() {
    unsigned long long a = 20170401000ULL;
    unsigned long long b = 20170401000ULL;
    unsigned long long result[3];
    __uint128_t m = (__uint128_t)a * (__uint128_t)b;

    // handle all 128-bit values, up to 340282366920938463463374607431768211455
    result[0] = m % 1000000000000000000;
    result[1] = m / 1000000000000000000 % 1000000000000000000;
    result[2] = m / 1000000000000000000 / 1000000000000000000;

    int i;
    for (i = 2; i > 0 && result[i] == 0; i--)
        continue;
    printf("%llu", result[i]);
    while (i-- > 0)
        printf("%18llu", result[i]);
    printf("\n");
    return 0;
}

Here is an example of (3) with a smaller range:

#include <stdio.h>

int main() {
    unsigned long long a = 20170401000ULL;
    unsigned long long b = 20170401000ULL;
    unsigned long long result[3];

    // handle results up to 18446744065119617025999999999999999999
    // slice the operand into low and high parts
    unsigned long long a_lo = a % 1000000000;
    unsigned long long a_hi = a / 1000000000;
    unsigned long long b_lo = b % 1000000000;
    unsigned long long b_hi = b / 1000000000;
    // compute the partial products
    result[0] = a_lo * b_lo;
    result[1] = a_hi * b_lo + a_lo * b_hi;
    result[2] = a_hi * b_hi;
    // normalize result (propagate carry)
    result[1] += result[0] / 1000000000;
    result[0] %= 1000000000;
    result[2] += result[1] / 1000000000;
    result[1] %= 1000000000;

    int i;
    // ignore leading zeroes
    for (i = 2; i > 0 && result[i] == 0; i--)
        continue;
    // output the leading group of digits
    printf("%llu", result[i]);
    // output the trailing groups of 9 digits
    while (i-- > 0) {
        printf("%09llu", result[i]);
    }
    printf("\n");
    return 0;
}

And a final approach combining both a binary computation and base 10 conversion for the full 128-bit range:

#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>

void mul64x64(uint32_t dest[4], uint64_t a, uint64_t b) {
    // using 32x32 -> 64 multiplications
    uint64_t low = (a & 0xFFFFFFFF) * (b & 0xFFFFFFFF);
    uint64_t mid1 = (a >> 32) * (b & 0xFFFFFFFF);
    uint64_t mid2 = (b >> 32) * (a & 0xFFFFFFFF);
    uint64_t high = (a >> 32) * (b >> 32);
    dest[0] = (uint32_t)low;
    mid1 += low >> 32;
    high += mid1 >> 32;
    mid2 += mid1 & 0xFFFFFFFF;
    dest[1] = (uint32_t)mid2;
    high += mid2 >> 32;
    dest[2] = (uint32_t)high;
    dest[3] = high >> 32;
}

uint32_t div_10p9(uint32_t dest[4]) {
    uint64_t num = 0;
    for (int i = 4; i-- > 0;) {
        num = (num << 32) + dest[i];
        dest[i] = num / 1000000000;
        num %= 1000000000;
    }
    return num;
}

int main() {
    uint32_t result[4];     // 128-bit multiplication result
    uint32_t base10[5];     // conversion to base10_9: pow(10,50) > pow(2,128)
    int i;

    mul64x64(result, 20170401000ULL, 20170401000ULL);
    for (i = 0; i < 5; i++) {
        base10[i] = div_10p9(result);
    }

    // ignore leading zeroes
    for (i = 4; i > 0 && base10[i] == 0; i--)
        continue;
    // output the leading group of digits
    printf("%"PRIu32, base10[i]);
    // output the trailing groups of 9 digits
    while (i-- > 0) {
        printf("%09"PRIu32, base10[i]);
    }
    printf("\n");
    return 0;
}

Output:

406845076500801000000

`#ifdef __SIZEOF_INT128__` for checking if compiler has __uint128_t or not. — Darth-CodeX, Apr 22 '23 at 15:16
what's the name of the algorithm used in third example? where can I find its proof? — Cinverse, Apr 22 '23 at 19:08
@Cinverse (continue...) Is this "Karatsuba multiplication" algorthim? Source: https://en.wikipedia.org/wiki/Karatsuba_algorithm — Cinverse, Apr 22 '23 at 19:35
@Cinverse: no, it is a much simpler algorithm, used in middle school to multiply 2-digit numbers. This variant uses larger *digits* with 1 billion values each. Think of it as `(a*10**9 + b) * (c*10**9 + d)` — chqrlie, Apr 22 '23 at 19:52
@Cinverse: I amended the answer with more explicit intermediary steps and explanations — chqrlie, Apr 22 '23 at 20:01
@chqrlie Okay, I never learnt this before, only knew Long multiplication, thanks. Also, your method is technically "Karatsuba Algorithm", isn't it? If you see its wiki page, you can relate your algorithm to its equation: `x*y = a*B^2 + b*B^1 + c*B^0`, where `B` is the base (in third example, B = 10^9), `a = xh*yh`, `b = xl*yh + xh*yl` and `c = xl*bl`. — Cinverse, Apr 22 '23 at 20:12
@Cinverse: not really: my algorithm is the classic long multiplication, using 4 multiplications, whereas Karatsuba uses only 3 multiplications: `a=xh*yh`, `c=xl*yl` and `b=(xh+xl)*(yh+yl)-a-c` — chqrlie, Apr 22 '23 at 20:26

score 1 · Answer 2 · answered Apr 22 '23 at 12:42

you need to store even larger values, you can use external libraries such as GMP (GNU Multiple Precision Arithmetic Library), which provides data types like mpz_t and mpq_t that can handle very large numbers with arbitrary precision. These data types can store integers and fractions of any size, limited only by available memory. I hope this helped you :)

score 1 · Answer 3 · edited Apr 22 '23 at 20:08

As the base 10ⁿ version hase already been given, the base 2ⁿ version is slightly more involved:

#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>

/*
    Unsigned arguments to make it more versatile.
    It is easy to get from signed integers to unsigend ones (just safe
    the sign somewhere if you need it later) but not so much vice versa.
 */
static void mul64x64(const uint64_t a, const uint64_t b, uint64_t *high, uint64_t *low)
{
   uint32_t ah, al, bh, bl;
   uint64_t plh, phh, pll, phl;
   uint64_t carry = 0;

   ah = (a >> 32ull) & 0xFFFFFFFF;
   al = a & 0xFFFFFFFF;

   bh = (b >> 32ull) & 0xFFFFFFFF;
   bl = b & 0xFFFFFFFF;

   plh = (uint64_t)al * bh;
   phh = (uint64_t)ah * bh;
   pll = (uint64_t)al * bl;
   phl = (uint64_t)ah * bl;

   /*
      |  high   |   low   |
           | al * bh |
      | ah * bh | al * bl |
           | ah * bl |
    */

   *low = (pll) + ((plh & 0xFFFFFFFF)<<32ull) + ((phl & 0xFFFFFFFF) << 32ull);
   carry = ((pll >> 32ull) + (plh & 0xFFFFFFFF) + (phl & 0xFFFFFFFF)) >> 32ull;
   *high = phh + (phl >> 32ull) + (plh >> 32ull) + carry;
}

/* Division of 128 bit by 32 bits */
static void div64x64by32(const int64_t high, const uint64_t low, const uint32_t denominator,
                         int64_t *quotient_high, uint64_t *quotient_low, uint64_t *remainder)
{
   uint32_t a1, a2, a3, a4, q1, q2, q3, q4;
   uint64_t w, t, b;

   /*
      |    high    |     low    |
      |  a1  |  a2 |  a3  | a4  |
   */

   a1 = ((uint64_t)high) >> 32ull;
   a2 = ((uint64_t)high) & 0xFFFFFFFF;
   a3 = low >> 32ull;
   a4 = low & 0xFFFFFFFF;

   b = (uint64_t) denominator;
   w = 0ull;

   /*
       This is explained in detail in Tom St Denis "Multi-Precision Math"
       (ask google for "tommath.pdf") and implemented in libtommath:
       https://github.com/libtom/libtommath
       That is also the library to go if you cannot use GMP or similar
       bigint-libraries for legal (license) reasons.
    */
   /* Loop unrolled because we have individual digits */
   w = (w << 32ull) + a1;
   if (w >= b) {
      t = w / b;
      w = w % b;
   } else {
      t = 0;
   }
   q1 = (uint32_t)t;

   w = (w << 32ull) + a2;
   if (w >= b) {
      t = w / b;
      w = w % b;
   } else {
      t = 0;
   }
   q2 = (uint32_t)t;

   w = (w << 32ull) + a3;
   if (w >= b) {
      t = w / b;
      w = w % b;
   } else {
      t = 0;
   }
   q3 = (uint32_t)t;

   w = (w << 32ull) + a4;
   if (w >= b) {
      t = w / b;
      w = w % b;
   } else {
      t = 0;
   }
   q4 = (uint32_t)t;

   /* Gather the results */
   *quotient_high = (int64_t)q1 << 32ull;
   *quotient_high += (int64_t)q2;
   *quotient_low = (uint64_t)q3 << 32ull;
   *quotient_low += (uint64_t)q4;
   /* The remainder fits in an uint32_t but I didn't want to complicate it further */
   *remainder = w;
}

/*
   Reverse the given string in-place.

   Fiddling that apart is an exercise for the young student.
   Why it is a bad idea to do it that way is for the commenters
   at stackoverflow.
 */
static void strrev(char *str)
{
   char *end = str + strlen(str) - 1;
   while (str < end) {
      *str ^= *end;
      *end ^= *str;
      *str ^= *end;
      str++;
      end--;
   }
}

/* Assuming ASCII */
static char *print_high_low_64(const int64_t high, const uint64_t low)
{
   int sign;
   char *output, *str, c;
   int64_t h;
   uint64_t l, remainder;
   uint32_t base;

   /* TODO: checks&balances! And not only here! */

   sign = (high < 0) ? -1 : 1;
   h = (high < 0) ? -high : high;
   l = low;

   /* 64 bits in decimal are 20 digits plus room for the sign and EOS */
   output = malloc(2 * 20 + 1 + 1);
   if (output == NULL) {
      return NULL;
   }
   str = output;

   /*
      Yes, you can use other bases, too, but that gets more complicated,
      you need a small table. Either with all of the characters as they
      are or with a bunch of small constants to add to reach the individual
      character groups in ASCII.
      Hint: use a character table, it's much easier.
    */
   base = 10ul;

   /* Get the bits necessary to gather the digits one by one */
   for (;;) {
      div64x64by32(h, l, base, &h, &l, &remainder);
      /*
         ASCII has "0" at position 0x30 and the C standard guarantees
         all digits to be in consecutive order.
         EBCDIC has "0" at position 0xF0 and would need an uint8_t type.
       */
      c = (char)(remainder + 0x30);
      *str = c;
      str++;
      if ((h == 0ll) && (l == 0ull)) {
         break;
      }
   }
   /* Put sign in last */
   if (sign < 0) {
      *str = '-';
      str++;
   }
   /* Don't forget EOS! */
   *str = '\0';

   /* String is in reverse order. Reverse that. */
   strrev(output);

   return output;
}

int main(int argc, char **argv)
{
   int64_t a, b;
   uint64_t high, low;
   int sign = 1;
   char *s;

   if (argc == 3) {
      /* TODO: catch errors (see manpage, there is a full example at the end) */
      a = strtoll(argv[1], NULL, 10);
      b = strtoll(argv[2], NULL, 10);
   } else {
      fprintf(stderr,"Usage: %s integer integer\n",argv[0]);
      exit(EXIT_FAILURE);
   }

   printf("Input: %"PRId64" * %"PRId64"\n", a, b);

   /* Yes, that can be done a bit simpler, give it a try. */
   if (a < 0) {
      sign = -sign;
      a = -a;
   }

   if (b < 0) {
      sign = -sign;
      b = -b;
   }

   mul64x64((uint64_t)a, (uint64_t)b, &high, &low);
   /* Cannot loose information here, because we multiplied signed integers  */
   a = (int64_t)high * sign;

   printf("%"PRId64"  %"PRIu64"\n",a,low);
   /* Mmmh...that doesn't seem right. Why? The high part is off by 2^64! */

   /* We need to do it manually. */
   s = print_high_low_64(a, low);
   printf("%s\n",s);

   /* Clean up */
   free(s);
   exit(EXIT_SUCCESS);
}
/* clang -Weverything -g3 -O3 stack_bigmul.c -o stack_bigmul */

But if you choose a 2ⁿ base it is a bit more flexible. You can exchange the types in the above code with other, smaller ones and make it work on 32-bit and 16-bit MCUs. It is bit more complicated with 8-bit micro controllers, but not that much.

`c = (char)(remainder + 0x30);` should always be written `c = (char)(remainder + '0');`. Digits are guaranteed to be consecutive and positive in the execution charset, which implies that EBCDIC systems must either have the `char` type be unsigned or have more than 8 bits. — chqrlie, Apr 22 '23 at 20:33
@chqrlie `0x30` instead of `'0'` to make the numerical action clearer. Or, if you prefer: for pedagocial reasons (I noted that I assume ASCII). Why the explicitely typed literals? Spend some time in a project where that was compulsory. Bad habits...y'know. — deamentiaemundi, Apr 22 '23 at 21:34
@chqrlie wasn't it always four spaces? But nevertheless: thanks! — deamentiaemundi, Apr 22 '23 at 21:38
The whole program was indented by 4 spaces, which is redundant with the code block \`\`\` markers — chqrlie, Apr 22 '23 at 21:41

How do you express a large integer?

3 Answers3