1

I have small program in C (substring):

char *str_sub(char *string, int from, int to) {
    assert(to < 0 || from < to);

    if (!(to < strlen(string))) {
        printf("%d %ld\n", to, strlen(string));
    }

    assert(from < strlen(string) && to < strlen(string));

    char *result = (char *) calloc(to - from + 1, 1);

    memcpy(result, &string[from], to);
    result[to] = '\0';

When I pass -1 to to, I want to make the function match to rest of the string, but that doesn't matter. Do you see the second assert? When I pass -1 to to, it will raise error and in the above condition it also says false, but it's just -1 and 12, and as we know from school, -1 < 12.

So where's the problem?

R.O.S.S
  • 605
  • 5
  • 18
  • 6
    strlen returns size_t which is unsigned and therefore -1 will be converted and [-1 is always unsigned max](http://stackoverflow.com/a/22801135/1708801). Very similar to this [sizeof case here](http://stackoverflow.com/a/22047204/1708801). – Shafik Yaghmour Oct 05 '15 at 14:05

4 Answers4

1

You're comparing variables with different signedness, so the representation of -1 is not uniform between them. See also @Shafik Yaghmour's SO answer referenced in the comments.

I can show the warnings from the compiler like so:

$ clang -Wsign-compare -c sign.c
sign.c:9:14: warning: comparison of integers of different signs: 'int' and 'unsigned long' [-Wsign-compare]
    if (!(to < strlen(string))) {
          ~~ ^ ~~~~~~~~~~~~~~
sign.c:13:17: warning: comparison of integers of different signs: 'int' and 'unsigned long' [-Wsign-compare]
    assert(from < strlen(string) && to < strlen(string));
           ~~~~ ^ ~~~~~~~~~~~~~~
/usr/include/assert.h:89:5: note: expanded from macro 'assert'
  ((expr)                                                               \
    ^
sign.c:13:40: warning: comparison of integers of different signs: 'int' and 'unsigned long' [-Wsign-compare]
    assert(from < strlen(string) && to < strlen(string));
                                    ~~ ^ ~~~~~~~~~~~~~~
/usr/include/assert.h:89:5: note: expanded from macro 'assert'
  ((expr)                                                               \
    ^

$ cat sign.c
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

char *str_sub(char *string, int from, int to) {
    assert(to < 0 || from < to);

    if (!(to < strlen(string))) {
        printf("%d %ld\n", to, strlen(string));
    }

    assert(from < strlen(string) && to < strlen(string));

    char *result = (char *) calloc(to - from + 1, 1);

    memcpy(result, &string[from], to);
    result[to] = '\0';

    return result;
}
Brian Cain
  • 14,403
  • 3
  • 50
  • 88
1

A many have said, to < strlen(string) compares (size_t)-1 < strlen(string). Since size_t is some unsigned type, (size_t)-1 is a large positive value and this compare regularly fails. @Shafik Yaghmour

To fix: take into account size_t.

char *str_sub(char *string, int from, int to) {
    assert(to < 0 || from < to);
    assert(from >= 0);

    size_t length = strlen(string);
    size_t uto = to < 0 ? length : to; 

    assert(from < uto);

    if (!(uto < strlen(string))) {
        printf("%zu %zu\n", uto, strlen(string));
    }

    assert(from < length && uto < length);

    char *result = malloc(uto - from + 1);

    // memcpy(result, &string[from], to);
    memcpy(result, &string[from], uto - from);
    // result[to] = '\0';
    result[uto - from] = '\0';
    return result;
}

Note: rather than all these assert(), suggest defining operation for all combinations of from,to.

Rather than int for from and to, use size_t and create #define MATCH_REST ((size_t)-1). Example:

#define  MATCH_REST ((size_t)-1)

char *str_sub(const char *string, size_t from, size_t to) {
  size_t length = strlen(string);
  if (to > length) to = length;
  if (from > to) from = to;

  size_t diff = to - from;
  char *result = malloc(diff + 1);
  if (result) {
    memcpy(result, &string[from], diff);
    result[diff] = '\0';
  }
  return result;
}
Community
  • 1
  • 1
chux - Reinstate Monica
  • 143,097
  • 13
  • 135
  • 256
0

strlen returns a size_t (unsigned-related) variable, and before any comparision, both variables must have the same type. So, one of them must be cast to the other.

In your example, I'm sure to has been casted to unsigned, causing overflow (-1), and probably, you are comparing strlen(string) to the max representable unsigned integer (the probable result of the overflow).

So, the solution is:

char *str_sub(char *string, int from, int to) {
  assert(to < 0 || from < to);

  if (!(to < (int)strlen(string))) {
    printf("%d %ld\n", to, strlen(string));
  }

  assert(from < strlen(string) && to < (int)strlen(string));

  char *result = (char *) calloc(to - from + 1, 1);

  memcpy(result, &string[from], to);
  result[to] = '\0';

Or, to avoid recalculations:

char *str_sub(char *string, int from, int to) {
  int length = strlen(string);

  assert(to < 0 || from < to);

  if (!(to < length)) {
    printf("%d %ld\n", to, length);
  }

  assert(from < length && to < length);

  char *result = (char *) calloc(to - from + 1, 1);

  memcpy(result, &string[from], to);
  result[to] = '\0';

Or, to do safe castings, for too long strings (thanks for remarking it, @chux):

char *str_sub(char *string, int from, int to) {
  size_t length = strlen(string);

  assert(length > 0);
  assert(to < 0 || from < to);

  if (!(to > 0 && to < length))
    printf("%d %lu\n", to, length);

  assert(from < length);
  assert(to < 0 || to < length);

  char *result = (char *) calloc(to - from + 1, 1);

  memcpy(result, &string[from], to);
  result[to] = '\0';
ABu
  • 10,423
  • 6
  • 52
  • 103
0

The compare operator assumes that both variables you compare are of the same type. This beeing not the case, the binary representation of "-1" unsigned as a signed variable is quite not "-1", but instead the max value of the variable.

Magisch
  • 7,312
  • 9
  • 36
  • 52