2
int i, f;
f = scanf("%d", &i);

When I enter input as 3333333333333333333333 (greater than the capacity of int). Shouldn't the value of f be 0?

chqrlie
  • 131,814
  • 10
  • 121
  • 189
koil
  • 41
  • 4
  • 5
    `scanf()` and friends don't do overflow checking. – Shawn Jun 29 '22 at 15:28
  • 2
    Forget the idea of using `scanf` for user input and then validate that input, it can't be done and `scanf` has not been designed with this in mind. Read whole lines with `fgets` and then parse the input, possibly using `strtol` or `strtod`. – Jabberwocky Jun 29 '22 at 15:45
  • 3
    Seconding Jabberwocky's comment. `scanf` is great for quick-and-dirty input. But it's absolutely lousy — basically useless — for robust real-world input in the face of possible errors which you'd like to cleanly and reliably detect. – Steve Summit Jun 29 '22 at 15:49
  • 2
    The `scanf` functions -- the entire family -- have _undefined behavior_ on numeric input overflow. Which means: Not only is it not possible to detect this kind of input error, the C library is allowed to _crash your program_ just because the input had too many digits. And I can't _think_ of a bogus assumption that the compiler could derive from this scenario, but I can't rule out the possibility. The `scanf` family is broken as specified and should never be used at all. – zwol Jun 29 '22 at 15:59
  • Try `char ch; int success = scanf("%5d%c",&i, &ch) == 2 && ch == '\n';` – chux - Reinstate Monica Jun 29 '22 at 21:48

3 Answers3

2

No, it can't be detected that way.

The below is not a portable solution, but it works in 12.1, 14.0 and 19.32. It may stop working in later releases.

You need to set errno = 0; first and then check it for range errors:

#include <errno.h>

// ...

    errno = 0;
    f = scanf("%d",&i);
    if(f == 1 && errno != ERANGE) {
        // success
    }

For portability, read this from an early draft of the C2x standard:

Unless assignment suppression was indicated by a *, the result of the conversion is placed in the object pointed to by the first argument following the format argument that has not already received a conversion result. If this object does not have an appropriate type, or if the result of the conversion cannot be represented in the object, the behavior is undefined.

A better (as in portable) option to detect this would be to read into a char[] buffer first and then use strtol() to convert it to a number. From the same standard draft:

The strtol, strtoll, strtoul, and strtoull functions return the converted value, if any. If no conversion could be performed, zero is returned. If the correct value is outside the range of representable values, LONG_MIN, LONG_MAX, LLONG_MIN, LLONG_MAX, ULONG_MAX, or ULLONG_MAX is returned (according to the return type and sign of the value, if any), and the value of the macro ERANGE is stored in errno.

Here's a demonstrative program using strtol() (which converts to long):

#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>

// A wrapper around `strtol` to convert to `int`
int strtoi(const char *str, char **str_end, int base) {
    int errno_save = errno;
    errno = 0; // clear it from any previous error (must be done)
    long result = strtol(str, str_end, base);
    if(errno == ERANGE) return result == LONG_MAX ? INT_MAX : INT_MIN;
    if(result > INT_MAX || result < INT_MIN) {
        errno = ERANGE;
        return result > INT_MAX ? INT_MAX : INT_MIN;
    }
    // success or no conversion could be performed
    errno = errno_save;  // restore errno
    return (int)result;
}
#define Size(x) (sizeof (x) / sizeof *(x))

int main(void) {
    const char* strings[] = {
        "3333333333333333333333 foo",
        "2147483647 will probably succeed",
        "2147483648 will probably fail",
        "32767 guaranteed success",
        "32767xyz",
        "xyz",
        "123",
        ""
    };

    char *end; // this will point at where the conversion ended in the string

    for(unsigned si = 0; si < Size(strings); ++si) {

        printf("testing \"%s\"\n", strings[si]);
        errno = 0; // clear it from any previous error (must be done)
        int result = strtoi(strings[si], &end, 10);

        if(errno == ERANGE) {
            perror(" to big for an int");
        } else if(strings[si] == end) {
            fprintf(stderr, " no conversion could be done\n");
        } else if(*end != '\0' && !isspace((unsigned char)*end)) {
            fprintf(stderr, " conversion ok,"
                            " but followed by a rouge character\n");
        } else {
            printf(" success: %d rest=[%s]\n", result, end);
        }
    }
}

Possible output:

testing "3333333333333333333333 foo"
 to big for an int: Numerical result out of range
testing "2147483647 will probably succeed"
 success: 2147483647 rest=[ will probably succeed]
testing "2147483648 will probably fail"
 to big for an int: Numerical result out of range
testing "32767 guaranteed success"
 success: 32767 rest=[ guaranteed success]
testing "32767xyz"
 conversion ok, but followed by a rouge character
testing "xyz"
 no conversion could be done
testing "123"
 success: 123 rest=[]
testing ""
 no conversion could be done
Ted Lyngmo
  • 93,841
  • 5
  • 60
  • 108
  • 2
    I can't agree this is a good suggestion. Setting `errno` to 0 before doing something, and then testing it afterwards, is almost never reliable. For a very few library functions, it's documented as working, but I don't believe `scanf` is one of them. – Steve Summit Jun 29 '22 at 15:40
  • 1
    I think ERANGE is GNU libc thing, not standard. Also, a man page seems unclear about what `scanf` returns in case of range error. So this answer could use more explanation/references. – hyde Jun 29 '22 at 15:42
  • @SteveSummit Other than checking if `i == -1;` (which it's set to in gcc, clang and msvc), checking `errno` is the only way I can think of with `scanf`. You _must_ set it to `0` first though since it'll be unchanged if the scan succeeds. – Ted Lyngmo Jun 29 '22 at 15:42
  • 1
    @TedLyngmo I just tried it on my machine and it didn't work. And, I'm sorry, "the only way I can think of" doesn't sound like a good rationale for an SO answer. – Steve Summit Jun 29 '22 at 15:43
  • I did in fact get `i` as -1, but I wouldn't count on that, either. – Steve Summit Jun 29 '22 at 15:44
  • @SteveSummit I read the [`strtol()`](https://en.cppreference.com/w/c/string/byte/strtol) documentation which [`scanf`](https://en.cppreference.com/w/c/io/fscanf) refers to, and it says `errno` will be set. That's why I went with this solution. It works in gcc, clang and msvc. What do you compile with? – Ted Lyngmo Jun 29 '22 at 15:45
  • 2
    @TedLyngmo the [`scanf`](https://en.cppreference.com/w/c/io/fscanf) documentation you refer to mentions `strtol` and relatives, but it does not say that these functions are _actually_ used nor is `errno` mentioned anywhere. – Jabberwocky Jun 29 '22 at 15:49
  • 1
    @TedLyngmo Sorry if this sounds like a threat — I truly don't mean it to — but the only reason I'm not downvoting is that there's an A/B test running and I don't want to give Stack Overflow the impression that I like the new voting buttons. So you're spared. :-) – Steve Summit Jun 29 '22 at 15:51
  • @Jabberwocky That's true. That's why it's the best I can think of when it comes to `scanf`. – Ted Lyngmo Jun 29 '22 at 15:51
  • 1
    @SteveSummit Phew! :-D I added a disclaimer and a link to a better function to the answer. – Ted Lyngmo Jun 29 '22 at 15:53
  • 2
    @TedLyngmo Thanks! And I hereby remove the downvote I didn't give you! :-) – Steve Summit Jun 29 '22 at 15:55
  • 1
    This works (with `scanf`) only by accident, as a consequence of some specific implementation of `scanf` using `strtol` internally, and -- by pure luck -- not clobbering `errno` with some other value on the way out. `scanf` has full-fledged nasal demon _undefined behavior_ on numeric input overflow, which is one of the two most important reasons why I say "never use `scanf`" every time it comes up (the other being that `%s` is just as dangerous as `gets`). – zwol Jun 29 '22 at 15:57
  • 1
    @SteveSummit best comment seen today :-) – Jabberwocky Jun 29 '22 at 15:57
  • @TedLyngmo In answer to your other question, I'm on a Mac, whose libc is largely bsd-based. (Although, in a further twist, rather than writing and compiling a C program, my tests were only using my homebrew C interpreter, and while I *think* its `errno` handling is realistic, that's another source of uncertainty...) – Steve Summit Jun 29 '22 at 16:00
  • @SteveSummit `errno` must be in thread local storage in newer C versions. Before the notion of threads was added to the language, then it was only an ordinary global. That could affect things I suppose. – Ted Lyngmo Jun 29 '22 at 16:02
  • @zwol Yes, I found the standard quote about UB in these cases and put that in the answer. – Ted Lyngmo Jun 29 '22 at 16:31
  • 1
    @TedLyngmo finally upvoted due to multiple edits – Jabberwocky Jun 29 '22 at 16:32
  • @Jabberwocky ❤️ :-) – Ted Lyngmo Jun 29 '22 at 16:33
  • 1
    @hyde Re: _"I think ERANGE is GNU libc thing"_ - No, it's actually standardized. I added a relevant quote from the latest draft. – Ted Lyngmo Jun 29 '22 at 17:08
  • 1
    This is really not a good answer, [as `scanf()` is free to munge `errno` for any reason](http://www.port70.net/~nsz/c/c11/n1570.html#7.5p3): "The value of errno may be set to nonzero by a library function call whether or not there is an error, provided the use of errno is not documented in the description of the function in this International Standard." While this might appear to work in some limited instances, it can't be relied upon to work in general and may fail at any time in cases that used to "work". – Andrew Henle Jun 29 '22 at 17:33
  • 1
    @AndrewHenle True. I tried to make the portability issue clear and tried to steer OP towards using `strtol` instead. – Ted Lyngmo Jun 29 '22 at 17:36
  • Note that `strtol()` does not restore `errno` unlike this `strtoi()`. For greater symmetry, drop the `int errno_save = errno; errno = 0; ... errno = errno_save;` and let the calling code do `errno = 0;`. [Example](https://stackoverflow.com/a/29378380/2410359). – chux - Reinstate Monica Jun 30 '22 at 09:21
  • @chux-ReinstateMonica `strtol` leaves `errno` as it was when entering `strtol` unless it sets `ERANGE`. The wrapper aims to behave the same way. I need this since I check for `ERANGE` in the function, and to be able to do that, I need to first set `errno = 0;` (unlike `str2subrange`) to be able to return `INT_MIN`/`INT_MAX` (with `errno` set to `ERANGE`). I think that logic holds. – Ted Lyngmo Jun 30 '22 at 10:31
  • Disagree with "I need `errno = 0;`" in `strtoi()` as your usage does `errno = 0;` in `main()`. – chux - Reinstate Monica Jun 30 '22 at 10:37
  • @chux-ReinstateMonica The `strtoi` function doesn't "know" that the user of the function has set `errno = 0` and shouldn't depend on it to be able to mimic `strtol` for `int`s. – Ted Lyngmo Jun 30 '22 at 10:39
  • I see better your goal - yet not in full agreement with the design - to each his own. As `strtol()` may set `errno` for additional reasons than `ERANGE` [e.g.](https://man7.org/linux/man-pages/man3/strtol.3.html) (common, even if that is non-standard behavior), this code could consider only restoring `errno` selectively. e.g. `errno = errno_save;` --> `if (errno == 0) errno = errno_save;`. Also note `strtoi()` is a reserved function name. (I'd UV here for your insights, expect we are in competition.) – chux - Reinstate Monica Jun 30 '22 at 10:57
  • @chux-ReinstateMonica _"to each his own"_ - True, :-) Posix may set `EINVAL` but I wanted it to just do what's mentioned in the standard to not trick OP into thinking that actually checking for `EINVAL` would be portable. – Ted Lyngmo Jun 30 '22 at 11:02
2

Shouldnt the value of f be 0?

With standard C, no. With scanf("%d",&i), on int overflow, the result is undefined.

With scanf() in Unix (of which there are variations), I find no prevention of undefined behavior with overflow.

Best to ditch (not use) scanf() and use fgets() for all user input.


Code could try a textual width limit and a wider type:

intmax_t bigd;
//          vv --- width limit
if (scanf("%18jd",&bigd) == 1 && bigd >= INT_MIN && bigd <= INT_MAX) {
  d = (int) bigd;
} else {
  puts("Oops");
}

Yet that has trouble on novel implementations where int is as wide as intmax_t.


scanf() returns 0 when no int textual input found.

A key design element missing from OP's questions is what should happen to user input that exceeds the int range? Stop reading after the first `"333333333"?

What is best, depends on how OP wants to handle, in detail, error conditions - something not yet stated.

chux - Reinstate Monica
  • 143,097
  • 13
  • 135
  • 256
1

scanf("%d", &i) does not detect overflow, worse even, scanf() has undefined behavior if the number exceeds the range of the destination type: depending on the implementation, the value of i could be -434809515, -1, 0, INT_MAX or any value including a trap value with or without some undesirable side effects.

The proper way to check the input is to read it as a line in an array of char and to parse it with strtol():

#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>

int main() {
    char input[120];
    char ch;
    char *p;    
    long x;
    int i;

    printf("Enter an integer: ");
    if (!fgets(input, sizeof input, stdin)) {
        fprintf(stderr, "missing input\n");
        return 1;
    }
    errno = 0;
    x = strtol(input, &p, 0);
    if (p == input) {
        fprintf(stderr, "invalid input: %s", input);
        return 1;
    }
    if (x < INT_MIN || x > INT_MAX) {
        errno = ERANGE;
    }
    if (errno == ERANGE) {
        fprintf(stderr, "number too large: %s", input);
        return 1;
    }
    if (sscanf(p, " %c", &ch) == 1) {
        fprintf(stderr, "trailing characters present: %s", input);
        return 1;
    }
    i = (int)x;  // we know `x` is in the proper range for this conversion
    printf("The number is %d\n", i); 
    return 0;
}

You can encapsulate these tests in a getint() function:

#include <ctype.h>
#include <limits.h>
#include <stdio.h>

/* read an int from a standard stream:
   always update *res with the value read
   return 0 on success
   return -1 on out of range, value is clamped to INT_MIN or INT_MAX
   return -2 on non a number, value is 0
   only read characters as needed, like scanf
*/
int getint(FILE *fp, int *res) {
    int n = 0;
    int ret = 0;
    int c;
    while (isspace(c = getc(fp)))
        continue;
    if (c == '-') {
        c = getc(fp);
        if (!isdigit(c)) {
            ret = -2;
        } else {
            while (isdigit(c)) {
                int digit = '0' - c;
                if (n > INT_MIN / 10 || (n == INT_MIN / 10 && digit >= INT_MIN % 10)) {
                    n = n * 10 + digit;
                } else {
                    n = INT_MIN;
                    ret = -1;
                }
                c = getc(fp);
            }
        }
    } else {
        if (c == '+')
            c = getc(fp);
        if (!isdigit(c)) {
            ret = -2;
        } else {
            while (isdigit(c)) {
                int digit = c - '0';
                if (n < INT_MAX / 10 || (n == INT_MAX / 10 && digit <= INT_MAX % 10)) {
                    n = n * 10 + digit;
                } else {
                    n = INT_MAX;
                    ret = -1;
                }
                c = getc(fp);
            }
        }
    }
    if (c != EOF)
        ungetc(c, fp);
    *res = n;
    return ret;
}

int main() {
    int i, res;

    printf("Enter an integer: ");
    res = getint(stdin, &i);
    switch (res) {
    case 0:
        printf("The number is %d.", i);
        break;
    case -1:
        printf("Number out of range: %d, res=%d.\n", i, res);
        break;
    default:
        printf("Invalid or missing input, res=%d.\n", res);
        break;
    }
    return 0;
}
chqrlie
  • 131,814
  • 10
  • 121
  • 189