-2

I've been learning about dynamic memory allocation in C and practicing it in code. While I think I've allocated the memory properly, I could be wrong about that. So any useful comments about that are fine (but please explain any suggestions so I can understand).

I've written a C program to first get a list of files in the current working directory, and write to each one. But the loop is seeing NULL before it's finished getting though all of the files. It was segfaulting before that, but I managed to fix that. Where it was not pointing to the correct part of memory, which was causing functions like fopen() to not get any filenames.

But while I can print out a list of files, the loop in writeToFiles() is just not completing the list of filenames that I've dynamically allocated whenever I use pretty much any of f* (file related) C library functions.

I've managed to fix all compilers errors and warning messages I was getting, but I really have no idea what's going on with it now. What I think is happening is that the pointer pointing to the struct member called filename isn't at the start of the pointer array. I'm pretty sure my buildFileList() function is fine and it IS working correctly. So do I need to "rewind" the pointer or something ?

If I could politely ask those that wish to respond to be as clear and complete as possible in any answers given. As I'm clearly missing something here, but I have no idea what.

WARNING: The following code if run will OVERWRITE the contents of the current working directory.

I've posted my code below:

___dyn_char_mem_alloc.c

char *___dyn_char_mem_alloc(size_t buffSize) {    
    
    // allocate char memory with malloc() for char *        
   
    char *ptr = malloc(buffSize+1);
   
    //printf("buffSize (char): %d\n", buffSize);
    
    // if malloc returns a pointer that is not NULL, we return it to the caller
    if (ptr != NULL) {
        // puts("malloc() pointer returned (char *)\n");
        // printf("ptr in ___dyn_char_mem_alloc() function = %p\n");
        return ptr;
    } else {       
        // puts("malloc() call failed!\n");
        return NULL; 
   }
}

___dyn_char_mem_alloc.h

#include <stdio.h>
#include <stdlib.h> // for malloc()

char *___dyn_char_mem_alloc(size_t buffSize);

___dyn_struct_mem_alloc.c

#include "___dyn_struct_mem_alloc.h"

struct filepath *___dyn_struct_mem_alloc(size_t buffSize) {
        
    // allocate char memory with malloc() for struct
    
    struct filepath *t_filepath = malloc(buffSize);   
    
    // if malloc returns a pointer that is not NULL, we return it to the caller
    if (t_filepath != NULL) {      
        return t_filepath;
    } else {       
        // puts("malloc() call failed!\n");
        return NULL;
    }
}

___dyn_struct_mem_alloc.h

#include <stdio.h>
#include <stdlib.h> // for malloc()
#include <linux/limits.h>

//struct filepath *___dyn_struct_mem_alloc(size_t buffSize);

struct filepath {
    char *path[PATH_MAX];
    char *filename[FILENAME_MAX];
};

build_file_list.c

struct filepath *buildFileList(void) {
    
    DIR *d = opendir(".");
    struct dirent *dir;
    struct stat stats;
    //struct filepath *t_filepath = malloc(sizeof(struct filepath));
    struct filepath *t_filepath = ___dyn_struct_mem_alloc(sizeof(struct filepath)); 
    int i = 0, strLength = 0, totalNumFiles = 0;        
    const char *pDirFsPtr = "..", *curWorkDirFsPtr = ".";        
    
    while (((dir = readdir(d)) != NULL)) {
        
        if ( (!strcmp(dir->d_name, pDirFsPtr)) || (!strcmp(dir->d_name, curWorkDirFsPtr)) )  {              
            continue;
        } else {              
            // check if it's a directory and if so skip it              
            stat(dir->d_name, &stats);
            if (!S_ISDIR(stats.st_mode)) {
                // get length of filename
                strLength = strlen(dir->d_name);
                  
                // allocate some memory for the filename               
                //t_filepath->filename[i] = malloc(sizeof(char) * (strLength+1));
                t_filepath->filename[i] = ___dyn_char_mem_alloc(strLength+1);                
                  
                // let's copy each filename from the dirent structure's d_name member to our newly dynamically allocated memory                 
                //  t_filepath->filename[i]=(dir->d_name); // doesn't matter if i use strcpy() below instead or not - same problem
                strcpy(t_filepath->filename[i], dir->d_name);                  
                //  printf("filepath->filename[%d] name: %s | ptr: %p \n",i, t_filepath->filename[i], t_filepath->filename[i]); // works no problem
                  
                totalNumFiles++;                  
            }                                         
        }
        ++i;
    }    
    closedir(d);
    
    printf("t_filepath struct ptr from buildFileList(): %p\n",  t_filepath);
    printf("t_filepath->filename struct ptr from buildFileList(): %p\n",  t_filepath->filename);
        
    #ifdef DEBUG    
    printf("\nTotal number of files in dir: %d\n\n", totalNumFiles);
    #endif
    
    #ifdef DEBUG
    int k = 0;
    while (t_filepath->filename[k] != NULL) {
        printf("t_filepath->filename[%d] name: %s | ptr addr: %p \n",k, t_filepath->filename[k], t_filepath->filename[k]);
        ++k;
    }
    #endif
    
    return t_filepath;
}

build_file_list.h

#include <stdio.h>
#include <dirent.h>
#include <sys/stat.h>
#include <string.h>
#include <unistd.h>
#include <libgen.h>
#include <linux/limits.h>

struct filepath *buildFileList(void);
char *___dyn_char_mem_alloc(size_t buffSize);
struct filepath *___dyn_struct_mem_alloc(size_t buffSize);

struct filepath {
    char *path[PATH_MAX];
    char *filename[FILENAME_MAX];
};

main.c

#include "main.h"

int main(int argc, char *argv[]) {
        
    //TODO: add a check for command-line args    
       
    someOtherFunction();
        
    return 0;
}

main.h

#include <stdio.h>

int someOtherFunction(void);

some_other_function.c

#include "some_other_function.h"

int someOtherFunction(void) {
    
    struct filepath *t_filenames = NULL;
    int i = 0;
    
    t_filenames = buildFileList();
    
    puts("\n");
    printf("t_filenames struct ptr from someOtherFunction(): %p\n",  t_filenames);
    printf("t_filenames->filename struct ptr from someOtherFunction(): %p\n",  t_filenames->filename);
    puts("\n");
    
    /* while (t_filenames->filename[i] != NULL) {
        printf("t_filenames->filename[%d] name (from someOtherFunction()): %s | ptr addr: %p \n",i, t_filenames->filename[i], t_filenames->filename[i]);
        ++i;
    } */ // works
    
    writeToFiles(t_filenames);
    
    return 0;
}

some_other_function.h

#include <stdio.h>
#include <linux/limits.h>

int someOtherFunction(void);
struct filepath *buildFileList(void);
void writeToFiles(struct filepath *t_filenames);

struct filepath {
    char *path[PATH_MAX];
    char *filename[FILENAME_MAX];
};

write_to_files.c

#include "write_to_files.h"

void writeToFiles(struct filepath *t_filenames) {
    
    FILE *curFile;
    int i = 0;  
    char msg[100] = "giggle, giggle, giggle....";  
    
    puts("\n");
    printf("t_filenames struct ptr from writeToFiles(): %p\n",  t_filenames);
    printf("t_filenames->filename struct ptr from writeToFiles(): %p\n",  t_filenames->filename);
    puts("\n");
    
    while (t_filenames->filename[i] != NULL) { 
        printf("filename name (from writeToFiles()): %s ptr addr: %p\n", t_filenames->filename[i], t_filenames->filename[i]);
        curFile = fopen(t_filenames->filename[i], "wb"); // works, but calling anymore functions were causing segfaults
        if (curFile != NULL) {
            puts("fprintf(curFile, %s, msg);");
            printf("current filename name: %s\n", t_filenames->filename[i]);
            // print childish message to file :-P
            fprintf(curFile, "%s", msg);
            puts("fclose(curFile);"); 
            fclose(curFile);
        } else {
            puts("loop broken!\n"); // always gets here way too early
            break;
        }         
        ++i;        
    }
    
    return;
}

write_to_files.h

#include <stdio.h>
#include <string.h>
#include <stdlib.h> // needed for exit() call -- remove?
#include <linux/limits.h>

struct filepath {
    char *path[PATH_MAX];
    char *filename[FILENAME_MAX];
};

Thanks for any help with this!

Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
  • Try running your code through valgrind. If you're mismanaging memory it will tell you where. – dbush Sep 01 '23 at 14:02
  • 8
    Can you please cut this down to a [mcve], emphasis on the **minimal**? – Nate Eldredge Sep 01 '23 at 14:03
  • Too many code snippets that are hard to get any connection between. It should not be too hard to create a [mre] to show us. – Some programmer dude Sep 01 '23 at 14:05
  • I've already tried valgrind, but it doesn't seem to want to work on my distro – anotherpoorbastard Sep 01 '23 at 14:06
  • I wanted to post all of my code so you guys can look at it to hopefully spot where I'm going wrong. I would have thought that you would need to see all of my code to figure out where I'm going wrong. I'm not sure how to cut it down any more than I already have before posting to still have it reproduce the problem. – anotherpoorbastard Sep 01 '23 at 14:07
  • 7
    So first, put it all into a single source file, so that a person can test it by copy-pasting one block of text instead of twelve. Second, go through the code. Anything that doesn't seem directly related to the bug, remove it or stub it out, and see if the bug still happens. Be patient; this is not necessarily going to be a quick process, but it's necessary. – Nate Eldredge Sep 01 '23 at 14:15
  • But that's the thing, if I put it all into one source file, I'm not sure it's going to reproduce the problem - which brings me no closer to understand what I'm doing wrong. – anotherpoorbastard Sep 01 '23 at 14:17
  • 1
    The other thing is that, since your program walks a directory tree, then someone trying to reproduce it needs to have a directory tree that causes the problem. So you need to show us what that tree looks like. The output of `find -ls` would be a good start; even better would be a script to create it. – Nate Eldredge Sep 01 '23 at 14:17
  • 2
    @anotherpoorbastard: Right, so you **test** it. Put it all in one source file and **compile and run it**; then you will know if it still reproduces the problem. Odds are that it will. If it doesn't, then you can start investigating why not, and will probably learn something from that as well. – Nate Eldredge Sep 01 '23 at 14:19
  • Build with compiler sanitizers? Add e.g. `-fsanitize=address` when building and linking. And of course with debug information (the `-g` flag). – Some programmer dude Sep 01 '23 at 14:20
  • Just to keep in mind: the aim of this site isn't to just fix your bug, per se; it's to help you understand the underlying problem, and perhaps some general approaches for solving it. That's easier to do in the context of a simplified example. Once you understand the problem, it will then be up to you to go back to your actual code base, look at the issue in context, decide how best to address it, and implement the fix. – Nate Eldredge Sep 01 '23 at 14:47
  • Unrelated, but `___dyn_struct_mem_alloc` isn't a good choice for a function name. In C, all identifiers starting with two underscores are reserved. – Nate Eldredge Sep 01 '23 at 14:50
  • And additionally, it would be good to add a big warning that this code overwrites all files in the current directory. It'd be unfortunate if some unsuspecting SO user decided to test your code without first reading it thoroughly, and happened to do so in a directory that contained important files. – Nate Eldredge Sep 01 '23 at 14:52
  • Done Nate, warning added - good idea – anotherpoorbastard Sep 01 '23 at 14:58
  • 1
    Note that you should not, in general, create function, variable, tag or macro names that start with an underscore. Part of [C11 §7.1.3 Reserved identifiers](https://port70.net/~nsz/c/c11/n1570.html#7.1.3) says: — _All identifiers that begin with an underscore and either an uppercase letter or another underscore are always reserved for any use._ — _All identifiers that begin with an underscore are always reserved for use as identifiers with file scope in both the ordinary and tag name spaces._ See also [What does double underscore (`__const`) mean in C?](https://stackoverflow.com/q/1449181) – Jonathan Leffler Sep 01 '23 at 15:19

1 Answers1

2

Several problems are immediately apparent:

  1. You're not initializing variables but relying on their value anyway
  2. You're not ensuring the files added to your list actually fit in the memory you allocated
  3. You're not ensuring that once you do create a list, there's a guaranteed way to know when to stop iterating over it when you read it.

This is not a complete list of all possible problems with your code. There may very well be other issues.

Your ___dyn_struct_mem_alloc() function fails to initialize the contents of the structure:

   struct filepath * ___dyn_struct_mem_alloc(size_t buffSize) {
            
       // allocate char memory with malloc() for struct
        
        struct filepath *t_filepath = malloc(buffSize);   
        
       // if malloc returns a pointer that is not NULL, we return it to the caller
       if (t_filepath != NULL) {      
           return t_filepath;
       }
       else {       
           // puts("malloc() call failed!\n");
            return NULL;
       }
    }

Yet this code:

while(t_filenames->filename[i] != NULL) { 
    .
    .
    .       
}

assumes unset values are NULL.

You also never ensure you don't try to add more than FILENAME_MAX files in any struct filepath. This look will keep adding filenames to the field no matter how many there are:

while (((dir = readdir(d)) != NULL)) {
      if ( (!strcmp(dir->d_name, pDirFsPtr)) || (!strcmp(dir->d_name, curWorkDirFsPtr)) )  {              
          continue;
      }          
      else {              
          // check if it's a directory and if so skip it              
          stat(dir->d_name, &stats);
          if (!S_ISDIR(stats.st_mode)) {
              // get length of filename
              strLength = strlen(dir->d_name);
              
              // allocate some memory for the filename               
              //t_filepath->filename[i] = malloc(sizeof(char) * (strLength+1));
              t_filepath->filename[i] = ___dyn_char_mem_alloc(strLength+1);                
              
              // let's copy each filename from the dirent structure's d_name member to our newly dynamically allocated memory                 
            //  t_filepath->filename[i]=(dir->d_name); // doesn't matter if i use strcpy() below instead or not - same problem
              strcpy(t_filepath->filename[i], dir->d_name);                  
            //  printf("filepath->filename[%d] name: %s | ptr: %p \n",i, t_filepath->filename[i], t_filepath->filename[i]); // works no problem
              
              totalNumFiles++;                  
          }                                         
      }
      ++i;
}    
closedir(d);

If there are more than FILENAME_MAX files in any directory, you will corrupt memory as your code overflows the buffer.

And if there are exactly FILENAME_MAX files in the directory, your code will have no way of knowing when it hits the end of the list because there can't be a NULL sentinal value even if you fix your code and initialize the filename array in the structure to all NULL pointer values.

Also, names such as ___dyn_char_mem_alloc are reserved identifiers:

All identifiers that begin with an underscore and either an uppercase letter or another underscore are always reserved for any use.

And since you appear to be using Linux, just delete ___dyn_char_mem_alloc() and the manual copy of the string you're doing with a single call to strdup(). Removing extraneous code such as that will make the remaining code easier to follow and understand.

Andrew Henle
  • 32,625
  • 3
  • 24
  • 56
  • Thank you Andrew, this is helpful. But could you expand on the points you've made, as while I understand what you're saying, I'm not really sure where exactly you mean when you say "not initializing variables but relying on their value anyway". Could you show actual examples of that? I'm also not sure what you mean in your last comment about strdup(), could you expand on that? – anotherpoorbastard Sep 01 '23 at 14:54
  • The memory malloc() returns is not set to all 0 bytes, it is in fact a a collection of what was in that memory since boot or last use by potentially some other program, which is essentially random. calloc() on the other hand sets all the bytes in the memory allocates to 0. – JohnH Sep 01 '23 at 17:33