5

An absolute difference would be the absolute value of the difference between 2 numbers. Suppose I have 2 int variables (x and y) and I would like to find the absolute difference. An easy solution would be:

unsigned diff = abs(x-y);

However these invoke undefined behavior and give incorrect results if overflow occurs such as if x is INT_MIN and y is INT_MAX. This returns 1 (assuming wraparound behavior) instead of UINT_MAX as expected.

user16217248
  • 3,119
  • 19
  • 19
  • 37

4 Answers4

7

Alright, the following works. @user16217248 got me started. See the discussion under that answer.

How to safely and efficiently find abs((int)num1 - (int)num2)

/// Safely and efficiently return `abs((int)num1 - (int)num2)`
unsigned int abs_num1_minus_num2_int(int num1, int num2)
{
    unsigned int abs_diff = num1 > num2 ?
        (unsigned int)num1 - (unsigned int)num2 :
        (unsigned int)num2 - (unsigned int)num1;

    return abs_diff;
}

The secret is to do the num1 > num2 ternary comparison with signed integer values, but then to reinterpret cast them to unsigned integer values to allow for well-defined overflow and underflow behavior when getting the absolute value of num1 - num2.

Here is my full test code:

absolute_value_of_num1_minus_num2.c from my eRCaGuy_hello_world repo:

///usr/bin/env ccache gcc -Wall -Wextra -Werror -O3 -std=gnu17 "$0" -o /tmp/a -lm && /tmp/a "$@"; exit
// For the line just above, see my answer here: https://stackoverflow.com/a/75491834/4561887

#include <limits.h>
#include <stdbool.h> // For `true` (`1`) and `false` (`0`) macros in C
#include <stdint.h>  // For `uint8_t`, `int8_t`, etc.
#include <stdio.h>   // For `printf()`


#define TEST_EQ(func, num1, num2, equals) \
    printf("%s\n", func((num1), (num2)) == (equals) ? "Passed" : "FAILED")

/// Safely and efficiently return `abs((int8_t)num1 - (int8_t)num2)`
uint8_t abs_num1_minus_num2_int8(int8_t num1, int8_t num2)
{
    // Note: due to implicit type promotion rules, rule 2 in my answer here
    // (https://stackoverflow.com/a/72654668/4561887) applies, and both the `>`
    // comparison, as well as subtraction, take place below as `int` types.
    // While signed `int` overflow and underflow is undefined behavior, none of
    // that occurs here.
    // - It's just useful to understand that even though we are doing
    //   `(uint8_t)num1 -(uint8_t)num2`, the C compiler really sees it as this:
    //   `(int)(uint8_t)num1 - (int)(uint8_t)num2`.
    // - This is because all small types smaller than `int`, such as `uint8_t`,
    //   are first automatically implicitly cast to `int` before any
    //   mathematical operation or comparison occurs.
    // - The C++ compiler therefore sees it as this:
    //   `static_cast<int>(static_cast<unsigned char>(num1)) - static_cast<int>(static_cast<unsigned char>(num2))`.
    //   - Run this code through https://cppinsights.io/ to see that.
    //     See here: https://cppinsights.io/s/bfc425f6 --> and click the play
    //     button.
    uint8_t abs_diff = num1 > num2 ?
        (uint8_t)num1 - (uint8_t)num2 :
        (uint8_t)num2 - (uint8_t)num1;

    // debugging
    printf("num1 = %4i (%3u); num2 = %4i (%3u); num1-num2=%3u;  ",
        num1, (uint8_t)num1, num2, (uint8_t)num2, abs_diff);

    return abs_diff;
}

/// Safely and efficiently return `abs((int)num1 - (int)num2)`
unsigned int abs_num1_minus_num2_int(int num1, int num2)
{
    unsigned int abs_diff = num1 > num2 ?
        (unsigned int)num1 - (unsigned int)num2 :
        (unsigned int)num2 - (unsigned int)num1;

    // debugging
    printf("num1 = %11i (%10u); num2 = %11i (%10u); num1-num2=%10u;  ",
        num1, (unsigned int)num1, num2, (unsigned int)num2, abs_diff);

    return abs_diff;
}


int main()
{
    printf("Absolute difference tests.\n");

    // ---------------
    // int8_t types
    // ---------------

    int8_t num1_8;
    int8_t num2_8;

    printf("\n");
    printf("INT8_MIN = %i, INT8_MAX = %i\n", INT8_MIN, INT8_MAX); // -128, 127

    num1_8 = -7;
    num2_8 = -4;
    TEST_EQ(abs_num1_minus_num2_int8, num1_8, num2_8, 3);

    num1_8 = INT8_MIN;
    num2_8 = INT8_MAX;
    TEST_EQ(abs_num1_minus_num2_int8, num1_8, num2_8, UINT8_MAX);

    num1_8 = INT8_MAX;
    num2_8 = INT8_MIN;
    TEST_EQ(abs_num1_minus_num2_int8, num1_8, num2_8, UINT8_MAX);

    num1_8 = 100;
    num2_8 = 10;
    TEST_EQ(abs_num1_minus_num2_int8, num1_8, num2_8, 90);

    num1_8 = 10;
    num2_8 = 100;
    TEST_EQ(abs_num1_minus_num2_int8, num1_8, num2_8, 90);

    num1_8 = 10;
    num2_8 = 10;
    TEST_EQ(abs_num1_minus_num2_int8, num1_8, num2_8, 0);

    num1_8 = INT8_MAX;
    num2_8 = 1;
    TEST_EQ(abs_num1_minus_num2_int8, num1_8, num2_8, 126);

    num1_8 = 1;
    num2_8 = INT8_MAX;
    TEST_EQ(abs_num1_minus_num2_int8, num1_8, num2_8, 126);

    // ---------------
    // int types
    // ---------------

    int num1;
    int num2;

    printf("\n");
    printf("INT_MIN = %i, INT_MAX = %i\n", INT_MIN, INT_MAX); // -2147483648, 2147483647

    num1 = -7;
    num2 = -4;
    TEST_EQ(abs_num1_minus_num2_int, num1, num2, 3);

    num1 = INT_MIN;
    num2 = INT_MAX;
    TEST_EQ(abs_num1_minus_num2_int, num1, num2, UINT_MAX);

    num1 = INT_MAX;
    num2 = INT_MIN;
    TEST_EQ(abs_num1_minus_num2_int, num1, num2, UINT_MAX);

    num1 = 100;
    num2 = 10;
    TEST_EQ(abs_num1_minus_num2_int, num1, num2, 90);

    num1 = 10;
    num2 = 100;
    TEST_EQ(abs_num1_minus_num2_int, num1, num2, 90);

    num1 = 10;
    num2 = 10;
    TEST_EQ(abs_num1_minus_num2_int, num1, num2, 0);

    num1 = INT_MAX;
    num2 = 1;
    TEST_EQ(abs_num1_minus_num2_int, num1, num2, 2147483646);

    num1 = 1;
    num2 = INT_MAX;
    TEST_EQ(abs_num1_minus_num2_int, num1, num2, 2147483646);


    return 0;
}

Sample run and output:

eRCaGuy_hello_world/c$ ./absolute_value_of_num1_minus_num2.c
Absolute difference tests.

INT8_MIN = -128, INT8_MAX = 127
num1 =   -7 (249); num2 =   -4 (252); num1-num2=  3;  Passed
num1 = -128 (128); num2 =  127 (127); num1-num2=255;  Passed
num1 =  127 (127); num2 = -128 (128); num1-num2=255;  Passed
num1 =  100 (100); num2 =   10 ( 10); num1-num2= 90;  Passed
num1 =   10 ( 10); num2 =  100 (100); num1-num2= 90;  Passed
num1 =   10 ( 10); num2 =   10 ( 10); num1-num2=  0;  Passed
num1 =  127 (127); num2 =    1 (  1); num1-num2=126;  Passed
num1 =    1 (  1); num2 =  127 (127); num1-num2=126;  Passed

INT_MIN = -2147483648, INT_MAX = 2147483647
num1 =          -7 (4294967289); num2 =          -4 (4294967292); num1-num2=         3;  Passed
num1 = -2147483648 (2147483648); num2 =  2147483647 (2147483647); num1-num2=4294967295;  Passed
num1 =  2147483647 (2147483647); num2 = -2147483648 (2147483648); num1-num2=4294967295;  Passed
num1 =         100 (       100); num2 =          10 (        10); num1-num2=        90;  Passed
num1 =          10 (        10); num2 =         100 (       100); num1-num2=        90;  Passed
num1 =          10 (        10); num2 =          10 (        10); num1-num2=         0;  Passed
num1 =  2147483647 (2147483647); num2 =           1 (         1); num1-num2=2147483646;  Passed
num1 =           1 (         1); num2 =  2147483647 (2147483647); num1-num2=2147483646;  Passed

Adjacently related

  1. The "absolute subtraction" above reminds me of the "rounding divide" set of solutions you can do with integer math too. For that, see my other answer here: Rounding integer division (instead of truncating). I present rounding up, rounding down, and rounding to nearest when doing integer division.

See also

  1. My answer on implicit casting/promotion and Integer and floating point rank and promotion rules in C and C++
  2. https://cppinsights.io/ - a very useful tool which expands your C++ code into exactly what the compiler sees, including after applying all automatic implicit type promotion rules in the compiler.
    1. Ex: see my code above here: https://cppinsights.io/s/bfc425f6 --> then click the play button to convert and expand it into what the compiler sees.
Gabriel Staples
  • 36,492
  • 15
  • 194
  • 265
  • 1
    `uint8_t abs_num1_minus_num2_int8(int8_t num1, int8_t num2) { uint8_t abs_diff = num1 > num2 ? (uint8_t)num1 - (uint8_t)num2 : (uint8_t)num2 - (uint8_t)num1; return abs_diff; }` appears to not trust the compiler to emit efficient code. `uint8_t abs_num1_minus_num2_int8(int8_t num1, int8_t num2) { return abs(num1 - num2);` is sufficient. – chux - Reinstate Monica Jun 07 '23 at 16:14
  • 1
    8-bit `num1, num2` are promoted to `int` before the subtraction and `num1 - num2` does not overflow. `abs()` good enough for 8-bit type differences. – chux - Reinstate Monica Jun 07 '23 at 16:25
  • @chux-ReinstateMonica, on the `abs_num1_minus_num2_int8()` function, agreed. On the `abs_num1_minus_num2_int()` function, what I've done still has merit, no? My 8-bit version was just so I could have smaller, easily-digested and manually checked, numbers I could use for unit testing, as a test case to prepare for the `int` and larger versions. That was my motivation for writing the 8-bit version at all. It was so I could manually check it and ensure I could trust my process for larger-bit versions. – Gabriel Staples Jun 07 '23 at 18:54
  • 1
    `abs_num1_minus_num2_int()` looks OK. My [comment](https://stackoverflow.com/questions/76419734/how-could-i-safely-find-the-absolute-difference-between-2-signed-integers-in-c/76420269?noredirect=1#comment134761284_76420269) was for `abs_num1_minus_num2_int8()`. IMO, it distracts from an otherwise fine answer as it ventures into narrower types than `int` - and those deserve additional considerations. – chux - Reinstate Monica Jun 07 '23 at 19:55
  • I'm not convinced there's any benefit to assigning `abs_diff` before immediately returning it. Why not simply `return num1 > num2 ? (unsigned int)num1 - (unsigned int)num2 : (unsigned int)num2 - (unsigned int)num1;`? – Toby Speight Jun 08 '23 at 06:48
  • @TobySpeight, because it looks better. It's for the reader, not the compiler. It creates self-documenting code where a human reader can look at the variable name to quickly figure out what is being returned and how it was created. I do it in virtually all of my functions. When others do it, I enjoy reading their code more and find it easier to read and understand. It is a habit which also makes the type crystal clear, and is particularly valuable in C++ with polymorphic objects and in place of implicit casting of long and complicated types after a return statement, leaving a reader wondering. – Gabriel Staples Jun 08 '23 at 14:07
  • I guess it's a taste thing; I find your style less readable. More variables to hold onto. – Toby Speight Jun 08 '23 at 14:14
  • @TobySpeight, in this simple function, either is fine. But yeah, it's a taste thing. In C++ where you have `status_or` types, such as `status_or>`, that can be an `error_status` or a `std::initializer_list` or something, I want to instantly know whether (and where) an `error_status` or a `std::initializer_list` is being returned, so doing it this way can be far more readable. – Gabriel Staples Jun 08 '23 at 14:21
3

A simple solution to this problem is to avoid overflow entirely by always subtracting the smaller one from the bigger one. This gives the expected results, even for x == INT_MIN and y == INT_MAX. The signed to unsigned conversion here is safe:

unsigned diff = x > y ? (unsigned)x-(unsigned)y : (unsigned)y-(unsigned)x;

Edit: In order for the subtraction to be guaranteed to not cause signed overflow in cases of the 'smaller' one being less than zero, the operands must be cast to unsigned.

user16217248
  • 3,119
  • 19
  • 19
  • 37
  • 5
    This is undefined behavior. If y is `INT_MAX` and x is anything < 0, then `INT_MAX - negative_number` overflows, which is undefined behavior. Also, your signed to unsigned conversion is _not_ safe, and the answer you cite relies on something different than your answer--namely, the fact that in that answer, the first operand, `u`, is an `unsigned int`, resulting in unsigned integer math. In your answer, all math is signed integer math, and undefined behavior if it overflows or underflows. You get overflow for large values minus negative values, and underflow for small values minus large values. – Gabriel Staples Jun 07 '23 at 04:38
  • 1
    @GabrielStaples Oops, thanks for bringing this to my attention. Trickier problem than I originally thought. – user16217248 Jun 07 '23 at 04:40
  • 2
    You got me started [in my answer](https://stackoverflow.com/a/76420269/4561887) at least. Thanks for that. You had some right ideas. – Gabriel Staples Jun 07 '23 at 06:11
  • 1
    @GabrielStaples I had the idea of conditionally subtracting the smaller one from the bigger one, and you realized that this only works for all `x` and `y` if I cast the operands to `unsigned` when subtracting. – user16217248 Jun 07 '23 at 06:16
  • 2
    Upvoted, now that you've got the fix in place. – Gabriel Staples Jun 07 '23 at 18:08
1

You can use stdint.h types, do the difference and then take the absolute value and convert to int. Something like this:

int32_t a,b,res;
...

int_least64_t diff = (int_least64_t)a - (int_least64_t)b; // int_least64_t to be sure
int_least64_t absDiff = llabs(diff);

// Finally 
res = absDiff > INT_MAX ? INT_MAX : (int32_t)absDiff;

jCres
  • 41
  • 5
0

For those who like to avoid casting, a variation on @Gabriel Staple answer.

unsigned abs_num1_minus_num2_int(int num1, int num2) {
    unsigned abs_diff = num1 > num2 ?
        0u + num1 - num2 :
        0u + num2 - num1;
    return abs_diff;
}

Note: Pedantic warnings may still arise from an int casually changing to an unsigned.

chux - Reinstate Monica
  • 143,097
  • 13
  • 135
  • 256
  • 1
    Personally, I'd rather have casts (which tell me the conversion is intentional) than mixed signed/unsigned arithmetic and implicit narrowing, and the consequent warnings from compilation and static analysis that need to be hand-checked and commented. But tastes evidently vary.... – Toby Speight Jun 08 '23 at 06:52
  • @TobySpeight What I would like is code that would move the _signed_ type to its corresponding _unsigned_ type, from the original be `int`, `long`, `long long`, ... I suppose `generic` could handle that. E.g.: `tounsigned(num1) - tounsigned(num2)`. – chux - Reinstate Monica Jun 08 '23 at 13:12
  • This also costs the execution time of the `add` instructions if the optimizations are unavailable or disabled. – user16217248 Jun 16 '23 at 23:16