-1

In the context of an assignment using C, where I have to create a li clone, I am trying to access all of the names of the child files in my directory.

By popular demand, here is the minimum code to reproduce this problem (please consider removing your downvotes...) (I wasn't kidding when I said it was a lot).

To start, have this file structure somewhere (.txt files have keyboard juggling) :

var
├── file1.txt
└── var2
    ├── test -> ../../test
    ├── var3
    │   ├── file2.txt
    │   └── file3.txt
    └── varfind -> ../

4 directories, 3 files

Code to reproduce main.c

#include <stdio.h>
#include <stdlib.h>

#include "helpers.h"

int main(int argc, char **argv) {
    char *destination = malloc(sizeof(char[THEORETICAL_MAX_PATH_LENGTH])); 
    switch (argc) {
        case 1: // (== Error case)
            fprintf(stderr, "Usage: %s file\n", argv[0]);
            exit(EXIT_FAILURE);
            break;
        case 2: // (== List)
            stripSlash(argv[argc - 1], destination);
            Object* obj =  createNode(destination, NULL);
            freeNode(obj);
            break;
        default:
            exit(0);
            break;
    }
    free(destination);
}

helpers.h

#ifndef HEADER_UTILITIES
#define HEADER_UTILITIES
#define THEORETICAL_MAX_PATH_LENGTH 4096

#include <sys/stat.h>

typedef struct {
    int numberOfChildren;
    char path[2][THEORETICAL_MAX_PATH_LENGTH];
    struct stat info;
    struct Object **children; // child[0] is parent.
} Object;

void stripSlash(char arg[], char *filename);

void freeNode(Object *node);
Object * createNode(const char *filename, Object *parent);

#endif

helpers.c

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <errno.h>
#include <dirent.h>
#include "helpers.h"

void stripSlash(char arg[], char *filename) {
    strncpy(filename, arg, THEORETICAL_MAX_PATH_LENGTH);
    if (filename[strlen(filename) - 1] == '/')
        filename[strlen(filename) - 1] = '\0';
}

void getChildren(const char *path, struct stat* statBuffer, char *files[THEORETICAL_MAX_PATH_LENGTH], int *numberOfFiles) {
    int count = 0;
    struct dirent *currentDir;
    if (lstat(path, statBuffer) == 0 && S_ISDIR(statBuffer->st_mode)) {
        DIR *folder = opendir(path);
        if (access(path, F_OK) != -1) {
            if (folder)
                while ((currentDir = readdir(folder))) {
                    if (strcmp(currentDir->d_name, ".") && strcmp(currentDir->d_name, "..")) {
                        files[count] = (char*) malloc(THEORETICAL_MAX_PATH_LENGTH);
                        snprintf(files[count], THEORETICAL_MAX_PATH_LENGTH, "%s", currentDir->d_name);
                        count++;
                    }
                }
            free(currentDir);
        }
        closedir(folder);
    } else if (errno < 0)
        printf("ERROR %d\n", errno);

    *numberOfFiles = count;
}

int rippleCycleCheckUpwards(Object *parent, __ino_t inode) {
    if (parent)
        if (parent->info.st_ino == inode)
            return 1;
        else 
            return rippleCycleCheckUpwards((Object*) parent->children[0], inode);
    else 
        return 0;
}

void freeNode(Object *node) {
    node->children[0] = NULL;
    for (int i = 1; i < node->numberOfChildren + 1; i++)
        freeNode((Object*) node->children[i]);
    free(node->children[0]);
    free(node->children);
    free(node);
    node = NULL;
}

Object * createNode(const char *filename, Object *parent) {
    Object *node = malloc(sizeof(Object));

    char *files[THEORETICAL_MAX_PATH_LENGTH];
    char *realDestination = malloc(THEORETICAL_MAX_PATH_LENGTH);
    char *res = realpath(filename, realDestination);

    strncpy(node->path[0], filename, THEORETICAL_MAX_PATH_LENGTH - 1);
    if (res) {
        strncpy(node->path[1], realDestination, THEORETICAL_MAX_PATH_LENGTH - 1);
        strncat(node->path[1], "\0", 1);
    }
    free(realDestination);
    
    struct stat *info = malloc(sizeof(struct stat));
    lstat(filename, info);
    if (S_ISLNK(info->st_mode) == 1) {
        getChildren(node->path[1], &node->info, files, &node->numberOfChildren);
    } else {
        getChildren(node->path[0], &node->info, files, &node->numberOfChildren);
    }
    free(info);
    
    node->children = malloc((1 + node->numberOfChildren) * sizeof(Object*));
    node->children[0] = (struct Object*) parent;

    if (rippleCycleCheckUpwards((Object*) parent, node->info.st_ino) == 1) {
        node->numberOfChildren = 0;
    }

    for (int i = 0; i < node->numberOfChildren; i++) {
        char nextfile[THEORETICAL_MAX_PATH_LENGTH];
        snprintf(nextfile, THEORETICAL_MAX_PATH_LENGTH - 1, "%s/%s", filename, files[i]);
        
        if (S_ISDIR(node->info.st_mode) || S_ISLNK(node->info.st_mode)) {
            node->children[i + 1] = (struct Object*) createNode(nextfile, node);
        }
        free(files[i]);
    }
    return node;
}

You can build with this Makefile or gcc:

CC=gcc
CCFLAGS=-Wall -g
LDFLAGS=
SOURCES=$(wildcard *.c)
OBJECTS=$(SOURCES:.c=.o)
TARGET=ultra_cp

all: $(TARGET)

$(TARGET): $(OBJECTS)
        $(CC) -o $@ $^ $(LDFLAGS) 

%.o: %.c %.h
        $(CC) $(CCFLAGS) -c $<

%.o: %.c
        $(CC) $(CCFLAGS) -c $<

clean:
        rm -f *.o $(TARGET)

Then test with make then ./ultra_cp var for standard use or

valgrind --tool=memcheck --leak-check=yes --track-origins=yes --read-var-info=yes ./ultra_cp var

My issue is that valgrind prints out:

HEAP SUMMARY:
==3221==     in use at exit: 8,192 bytes in 2 blocks
==3221==   total heap usage: 112 allocs, 110 frees, 670,440 bytes allocated
==3221==
==3221== 8,192 bytes in 2 blocks are definitely lost in loss record 1 of 1
==3221==    at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==3221==    by 0x108CA1: getChildren (helpers.c:29)
==3221==    by 0x108F86: createNode (helpers.c:80)
==3221==    by 0x1090F8: createNode (helpers.c:98)
==3221==    by 0x1090F8: createNode (helpers.c:98)
==3221==    by 0x1091E3: main (main.c:15)
==3221==
==3221== LEAK SUMMARY:
==3221==    definitely lost: 8,192 bytes in 2 blocks
==3221==    indirectly lost: 0 bytes in 0 blocks
==3221==      possibly lost: 0 bytes in 0 blocks
==3221==    still reachable: 0 bytes in 0 blocks
==3221==         suppressed: 0 bytes in 0 blocks

helpers.c:29 is the line where files[count] = (char*) malloc(THEORETICAL_MAX_PATH_LENGTH); is called and the number of cases where it does not enter within the if statement.

It does not hinder the functioning of my code, but I'd like to avoid leaking memory if at all possible and would subsequently thank you for any help on this matter.

EDIT

Thanks to user dbush who provided the source of the problem, being that in the very specific case where rippleCycleCheckUpwards(...) is called, I do not empty the children of files.

In helpers.c one can replace

    if (rippleCycleCheckUpwards((Object*) parent, node->info.st_ino) == 1) {
        node->numberOfChildren = 0;
    }

with

    if (rippleCycleCheckUpwards((Object*) parent, node->info.st_ino) == 1) {
        for (int i = 0; i < node->numberOfChildren; i++) {
            free(files[i]);
        }
        node->numberOfChildren = 0;
    }
Community
  • 1
  • 1
AtomicMaya
  • 101
  • 10
  • Comments are not for extended discussion; this conversation has been [moved to chat](https://chat.stackoverflow.com/rooms/201745/discussion-on-question-by-atomicnicos-malloc-within-if-statement-assigned-onl). – Samuel Liew Nov 01 '19 at 23:02

2 Answers2

2

If you conditionally allocate memory, you've got to free it, as well. Intuitively, one might assume they need to check if the memory is allocated before trying to free it, but luckily, the C standard has us covered.

You can shortcut your way around some code by setting the file pointer to NULL outside the loop (or in an else statement), and then calling free() later in the code, once you know you won't be using the memory anymore. You don't need to check if the pointer has been allocated, since free(NULL) is a perfectly legal no-op.

Nick Reed
  • 4,989
  • 4
  • 17
  • 37
0

The problem is here in createNode:

struct stat *info = malloc(sizeof(struct stat));
lstat(filename, info);
if (S_ISLNK(info->st_mode) == 1) {
    getChildren(node->path[1], &node->info, files, &node->numberOfChildren);
} else {
    getChildren(node->path[0], &node->info, files, &node->numberOfChildren);
}
free(info);

node->children = malloc((1 + node->numberOfChildren) * sizeof(Object*));
node->children[0] = (struct Object*) parent;

if (rippleCycleCheckUpwards((Object*) parent, node->info.st_ino) == 1) {
    node->numberOfChildren = 0;
}

You first get the list of children which are stored in files. You then do the cycle check, and if you find a cycle you set node->numberOfChildren to 0. This prevents you from entering the loop that follows where the entries of files are freed. So if you have a cycle you have a memory leak.

You can fix this by doing the cycle check first, then decending into the children if you don't find a cycle:

if (rippleCycleCheckUpwards((Object*) parent, node->info.st_ino) == 1) {
    node->numberOfChildren = 0;
} else {
    struct stat *info = malloc(sizeof(struct stat));
    lstat(filename, info);
    if (S_ISLNK(info->st_mode) == 1) {
        getChildren(node->path[1], &node->info, files, &node->numberOfChildren);
    } else {
        getChildren(node->path[0], &node->info, files, &node->numberOfChildren);
    }
    free(info);
}

node->children = malloc((1 + node->numberOfChildren) * sizeof(Object*));
node->children[0] = (struct Object*) parent;

Besides this, valgrind also shows several errors on reading uninitialized values, but I'll leave fixing those as an exercise to the reader.

dbush
  • 205,898
  • 23
  • 218
  • 273
  • This works with a few amends except for `node->info.st_ino` being initialized within `getChildren(...)` (needs to be declared initialized pre if, and remove the initialisation within `getChildren(...)` and pass the status of this initialisation down in the function call). – AtomicMaya Nov 01 '19 at 15:23
  • I'll accept this as this answer gives the source of the problem. A less complicated work around using the same logic will be added to the main question. Thanks again – AtomicMaya Nov 01 '19 at 23:40