1

I am contacting you because I need to code the functions realloc / strlen (signed and unsigned) / memcpy. I have recoded these functions but it's doens't working and now I have valgrind errors like Conditional jump or move depends on uninitialised value(s). Can you help me to fix the problem?

Here are the functions:

void *my_realloc(void *ptr, size_t size)
{
    unsigned char *old_ptr = (unsigned char *)ptr;
    void *new_ptr = NULL;
    size_t old_size = 0;
    if (!ptr) {
        return malloc(size);
    }
    if (size == 0) {
        free(ptr);
        return NULL;
    }
    old_size = my_strlen_unsigned(old_ptr) + 1;
    new_ptr = malloc(size);
    if (!new_ptr) {
        return NULL;
    }
    my_memcpy(new_ptr, ptr, old_size < size ? old_size : size);
    free(ptr);
    return new_ptr;
}
void *my_memcpy(void *restrict dest, const void *restrict src, size_t n)
{
    if (!dest || !src) return NULL;
    unsigned char *d = dest;
    const unsigned char *s = src;
    size_t i = 0;
    while (i < n && i < my_strlen_unsigned(s)) {
        *d = *s;
        d++;
        s++;
        i++;
    }
    return dest;
}
size_t my_strlen_unsigned(const unsigned char *s)
{
    size_t count = 0;
    if (s != NULL) {
        while (*s != 0) {
            count++;
            s++;
        }
    }
    return count;
}
size_t my_strlen(const char *s)
{
    size_t count = 0;
    if (s != NULL) {
        while (*s != 0) {
            count++;
            s++;
        }
    }
    return count;
}

Currently I test these functions via the following functions:

char *my_str_clean(char *str)
{
    char *ptr = str;
    char *new_str = malloc(1);
    size_t i = 0;
    if (!new_str)
        return (NULL);
    while (*ptr) {
        if (*ptr != ' ' && *ptr != '\t' && *ptr != '\n') {
            new_str = my_realloc(new_str, (sizeof(char) * (i + 2)));
            new_str[i] = *ptr;
            i++;
        }
        ptr++;
    }
    new_str[i] = '\0';
    free(str);
    return (new_str);
}

int main(int argc, char **argv, char **env)
{
    char *test = malloc(15);
    test[0] = 'l';
    test[1] = 's';
    test[2] = ' ';
    test[3] = ' ';
    test[4] = ' ';
    test[5] = ' ';
    test[6] = ' ';
    test[7] = ' ';
    test[8] = ' ';
    test[9] = '-';
    test[10] = 'l';
    test[11] = ' ';
    test[12] = '-';
    test[13] = 'a';
    test[14] = '\0';
    char *clean = NULL;
    clean = my_str_clean(test);
    printf("%s\n", clean);
    free(clean);
}

Here is the valgrid report :

==28637== Memcheck, a memory error detector
==28637== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==28637== Using Valgrind-3.19.0 and LibVEX; rerun with -h for copyright info
==28637== Command: ./mysh
==28637== 
==28637== Conditional jump or move depends on uninitialised value(s)
==28637==    at 0x109BB2: my_strlen_unsigned (my_strlen_unsigned.c:14)
==28637==    by 0x109B2B: my_realloc (my_realloc.c:35)
==28637==    by 0x1096B5: my_str_clean (my_str_clean.c:19)
==28637==    by 0x10954A: main (minishell.c:55)
==28637==  Uninitialised value was created by a heap allocation
==28637==    at 0x4841888: malloc (vg_replace_malloc.c:381)
==28637==    by 0x109660: my_str_clean (my_str_clean.c:13)
==28637==    by 0x10954A: main (minishell.c:55)
==28637== 
==28637== Conditional jump or move depends on uninitialised value(s)
==28637==    at 0x109BB2: my_strlen_unsigned (my_strlen_unsigned.c:14)
==28637==    by 0x109ABC: my_memcpy (my_memcpy.c:16)
==28637==    by 0x109B73: my_realloc (my_realloc.c:40)
==28637==    by 0x1096B5: my_str_clean (my_str_clean.c:19)
==28637==    by 0x10954A: main (minishell.c:55)
==28637==  Uninitialised value was created by a heap allocation
==28637==    at 0x4841888: malloc (vg_replace_malloc.c:381)
==28637==    by 0x109660: my_str_clean (my_str_clean.c:13)
==28637==    by 0x10954A: main (minishell.c:55)
==28637== 
==28637== Conditional jump or move depends on uninitialised value(s)
==28637==    at 0x1097A5: my_strlen (my_strlen.c:18)
==28637==    by 0x10962C: my_put_str (my_put_str.c:21)
==28637==    by 0x10955F: main (minishell.c:56)
==28637==  Uninitialised value was created by a heap allocation
==28637==    at 0x4841888: malloc (vg_replace_malloc.c:381)
==28637==    by 0x109B3F: my_realloc (my_realloc.c:36)
==28637==    by 0x1096B5: my_str_clean (my_str_clean.c:19)
==28637==    by 0x10954A: main (minishell.c:55)
==28637== 
l==28637== 
==28637== HEAP SUMMARY:
==28637==     in use at exit: 0 bytes in 0 blocks
==28637==   total heap usage: 8 allocs, 8 frees, 43 bytes allocated
==28637== 
==28637== All heap blocks were freed -- no leaks are possible
==28637== 
==28637== For lists of detected and suppressed errors, rerun with: -s
==28637== ERROR SUMMARY: 18 errors from 3 contexts (suppressed: 0 from 0)
Virgil G.
  • 75
  • 6
  • What is `my_strlen_unsigned`? Is the data guaranteed to be a "string" that can be reliably counted by that function? – Some programmer dude Feb 16 '23 at 10:57
  • *"the realloc function must have the same prorotype as the official function and work for any pointer type"* - well then you definitely cannot assume that the pointer is a `unsigned char *` pointing to a valid terminated C string, can you? In other words, `my_strlen_unsigned` cannot be used. – Marco Bonelli Feb 16 '23 at 10:59
  • 1
    @VirgilG. you cannot. You will have to pass the old size to the function as well. The way `malloc` does it is that the metadata of the chunk (size and other things) is stored right before the chunk, so internally it knows the size even without explicitly passing it. However, you do not have access to this information. If you want to write your own allocator, you should probably re-implement `malloc` too in such a way that you can store the size somewhere in the chunk. – Marco Bonelli Feb 16 '23 at 11:02
  • See [this answer of mine](https://stackoverflow.com/a/62096458/3889449) for more information. You should be able to query the size using `malloc_usable_size()`, but it's a GNU extension, so if you use it the code will not be portable and will only work on system using glibc (GNU libc). – Marco Bonelli Feb 16 '23 at 11:03
  • Ok, thanks a lot, but can you write me the right realloc / memcpy function that doesn't use unsigned char * and that uses malloc_usable_size() ? – Virgil G. Feb 16 '23 at 11:06
  • I would recommend that you look at the musl C implementation. It is quite small and neat. glibc is complicated by having to support many OSes and platforms – Paul Floyd Feb 16 '23 at 14:31

2 Answers2

2

Invalid assumption:

void *my_realloc(void *ptr, size_t size)
{
    unsigned char *old_ptr = (unsigned char *)ptr;

You're breaking the realloc() specification here. realloc() doesn't assume that the ptr would always be pointing to a string. It is type-agnostic.

And the cast is redundant. There's an implicit conversion from void * to any other pointer type.


Undefined behaviour:

old_size = my_strlen_unsigned(old_ptr) + 1;

The standard, or your my_strlen_unsigned version of it, works with strings, i.e. the pointer is assumed to be pointing to an array of char terminated by a null-byte. An array of ints will not be terminated with a null-byte. Your my_realloc() thus invokes undefined behaviour.


Possible fix:

You can't determine the old size of the block pointed to by ptr and reimplement realloc(), without at least reimplementing your own malloc() and free(), portably. But you can take the old size as the third argument. (But the standard realloc() only takes two. So would this be a conforming implementation?)

Here's glibc's implementation of it: malloc().c

Here's musl C implementation of it: malloc().c

And here's my toy example that attempts to emulate the standard realloc() to some extent:

/** 
*    @brief The my_realloc() function shall deallocate the old object pointed to
*           by ptr and return a pointer to a new object that has the size specified by new_size.
*
*    @param ptr - A pointer to a block of memory to resize.
*    @param old_size - The size of the block pointed to by ptr.
*    @param new_size - The size to resize by.
*
*    If ptr is a null pointer, my_realloc() shall be equivalent to
*    malloc() for the specified new_size.
*
*    If ptr does not match a pointer returned earlier by calloc(),
*    malloc(), or realloc() or if the space has previously been
*    deallocated by a call to free() or realloc(), the behavior is undefined.
*    
*    @return Upon successful completion, my_realloc() shall return a pointer to the moved allocated space.  
*            If size and ptr both evaluate to 0, my_realloc() shall return a 
*            NULL pointer with errno set to [EINVAL].
*            If there is not enough available memory, my_realloc() shall return a
*            NULL pointer and set errno to [ENOMEM].
*            
*            If my_realloc() returns a NULL pointer, the memory referenced by ptr shall not be changed.
*
*    @warning my_realloc() may return NULL to indicate an error. For that reason, a different pointer variable 
*             must be used to hold it's return value. Otherwise, you risk overwriting the original ptr with NULL and 
*             losing your only reference to the original block of memory.
*/ 
              
void *my_realloc (void *ptr, size_t new_size, size_t old size) 
{
    if (!ptr) {
        return malloc (new_size);
    }
    
    if (!new_size) {
        errno = EINVAL;
        return 0;
    }
    
    if (new_size <= old_size) {
        return ptr;
    }

   /* As a last resort, allocate a new chunk and copy to it. 
    */
    void *new = 0;
    if (new_size > old_size) {
        new = malloc (new_size);
        if (!new) {
            return 0;
        }
        memcpy (new, ptr, old_size);
        free (ptr);
    }
    return new;
}

You can also find a sample implementation in chapter 8 of K&R.


Side-notes:

char *new_str = malloc(1);
new_str = my_realloc(..);

You risk losing access to the original memory allocated through malloc() here. If my_realloc() returned NULL, new_str would be assigned it's result and you would cause a memory leak in your program.

Furthermore, the memory returned by malloc() is uninitialized. And your code invokes undefined behaviour by calling my_strlen() underneath my_realloc() on a pointer that is uninitialized. Hence the warnings.


Harith
  • 4,663
  • 1
  • 5
  • 20
1

Your are assuming that data in the chunk is a correctly terminated C string, and using my_strlen_unsigned() to calculate the old size. You cannot do this since you cannot know what kind of data is stored in a chunk. The only real solution is to also remember the chunk size somehow, and pass it as parameter to your function.

If you cannot do this, there might be some other ways to get around the problem. For example, the malloc_usable_size() function is a GNU extension available in glibc (GNU libc) that can be used to query the size of an existing chunk, so you can use that instead of your my_strlen_unsigned(). Note however that if you use dynamic linking (the default when compiling) this will make your program non-portable and will only work on systems using glibc. You may wish to link your program statically in this case.

Assuming that the other functions in your code (such as my_memcpy()) are correctly implemented, a correct implementation of my_realloc() would be as follows:

void *my_realloc(void *ptr, size_t size)
{
    void *new_ptr;
    size_t old_size;

    if (!ptr)
        return malloc(size);

    if (size == 0) {
        free(ptr);
        return NULL;
    }

    new_ptr  = ptr;
    old_size = malloc_usable_size(ptr);
    
    if (size != old_size) {
        new_ptr = malloc(size);
        if (!new_ptr)
            return NULL;

        my_memcpy(new_ptr, ptr, old_size < size ? old_size : size);
        free(ptr);
    }

    return new_ptr;
}
Marco Bonelli
  • 63,369
  • 21
  • 118
  • 128