2

Understanding handling direct pointers in C

Here is a code that works for an array of strings for fixed number of items and fixed line length :

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAXNAMELEN 100
#define MAXLINELEN 100
#define MAXITEMS 1000

int main(int argc, char ** argv) {

 FILE * infile, * outfile;
 char name[MAXNAMELEN];
 char line[MAXLINELEN];
 char lines[MAXITEMS][MAXLINELEN];
 int i, items = 0;

 printf("Enter a source filename: ");
 fgets(name, sizeof(name), stdin);
 name[strlen(name)-1] = '\0'; // strip newline
 infile = fopen(name, "r");
 while (fgets(line, sizeof(line), infile)) {
        strcpy(lines[items], line);
        items++;
 }

 qsort(lines, items, MAXLINELEN, strcmp);

 printf("Enter a destination filename: ");
 fgets(name, sizeof(name), stdin);
 name[strlen(name)-1] = '\0'; // strip newline
 outfile = fopen(name, "w");
 for (i=0; i<items; i++) {
    fputs(lines[i], outfile);
 }

 fclose(infile);
 fclose(outfile);
}

Problem description and code

If I try to read an input.txt file that is within MAXLINELEN and MAXITEMS, the program works fine. Now imagine I am reading from a much larger "inputfile" line by line where maximum line length could be anything, then I would have to use a character pointer (char*) to read the input. char* linesptr[MAXITEMS];

Here is my code where I am trying to accomplish reading from an input file line by line delimited by a newline character.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <unistd.h>
#define MAXNAMELEN 1000
#define MAXLINELEN 1000
#define MAXITEMS 100000

char* linesptr[MAXITEMS];

int
main(int argc, char ** argv) {

 FILE * infile, * outfile;
 char name[MAXNAMELEN];
 char line[MAXLINELEN];

 int i, items = 0;

 printf("Enter a source filename: ");
 fgets(name, MAXNAMELEN, stdin);
 name[strlen(name)-1] = '\0'; // strip newline
 printf("%s infile \n",name);
 infile = fopen(name, "r");
 while (fgets(line, MAXLINELEN, infile)) {
    int length = strlen(line);
    line[length-1] = '\0';
    linesptr[items] = line; *<- I am writing to the same mem location*
    printf("the input string %d is : %s \n",items,  linesptr[items]);
        items++;
 }

 qsort(linesptr, items, MAXLINELEN, strcmp); 
 printf("Enter a destination filename: ");
 fgets(name, sizeof(name), stdin);
 name[strlen(name)-1] = '\0'; // strip newline
 outfile = fopen(name, "w");
 for (i=0; i<items; i++) {
    fputs(linesptr[i], outfile);
 }

 fclose(infile);
 fclose(outfile);
}

PROBLEM

I am copying the pointer address into the nth cell of the array linesptr where nth is the value=items (Here is the reference line from the code: linesptr[items] = line;). so when you print the final answer, I am referencing the same memory address to the buffer named line, the memory location at line will always point to the most recent fgets(). I understand the error but I do not know how to fix the issue. I would appreciate any help to fix the bug in the code.

Barmar
  • 741,623
  • 53
  • 500
  • 612
Akshaya
  • 53
  • 5

3 Answers3

3

Copy the line to a dynamically-allocated string.

while (fgets(line, MAXLINELEN, infile)) {
    int length = strlen(line);
    if (length > 0 && line[length-1] == '\n') {
        line[length-1] = '\0';
        length--;
    }
    char *linecopy = malloc(length+1);
    strcpy(linecpy, line);
    linesptr[items] = linecpy;
    printf("the input string %d is : %s \n",items,  linesptr[items]);
    items++;
}

And if you want to handle more than MAXITEMS lines, you should allocate linesptr using malloc() as well. When you get to the current size of linesptr you can use realloc() to make it longer. See Read unknown number of lines from stdin, C for detailed code.

See How to qsort an array of pointers to char in C? for the proper way to sort an array of pointers to strings.

Barmar
  • 741,623
  • 53
  • 500
  • 612
  • Thank you for sharing this. :) It worked.. Now I have the input in "linesptr" variable. When I pass this to the built-in qsort() method, it throws segmentation fault. How do I overcome this? – Akshaya Jun 23 '18 at 02:19
  • ---- I fixed ^^.. Basically, I had to change the following line in the code from "qsort(linesptr, items, MAXLINELEN, strcmp); " to "qsort(linesptr, items, sizeof(char *), strcmp); " – Akshaya Jun 23 '18 at 02:27
  • That's what I said in my answer. – Barmar Jun 23 '18 at 02:28
  • Thank you, Barmar. I have another follow up question. The qsort() method doesn't sort my input. I am not sure what I am doing wrong. I would appreciate help with this too. Basically, I am reading from an input file that has the following text in it: how are you? hello there somewhere over the rainbow This is how my output looks like: how are you?hello theresomewhere over therainbow – Akshaya Jun 23 '18 at 02:31
  • `strcmp()` is not the correct comparison function, because it doesn't dereference the pointer. I've added a link to another SO question that shows how to do it correctly. – Barmar Jun 23 '18 at 02:41
  • Oh yes, strcmp() works for multidimensional arrays not pointers. Thanks so much for all the help! :) My code works with the new changes. – Akshaya Jun 23 '18 at 03:09
  • Pedantic point: `line[length-1] = '\0';` begins a hacker exploit of _undefined behavior_ when the first character read in a line is a _null character_. Recommend `if (length > 0 && line[length] == '\n') line[--length] = 0; linecopy = malloc(length+1);` – chux - Reinstate Monica Jun 25 '18 at 15:48
  • @chux How can `fgets()` ever return an empty string? If there's nothing to return you must be at EOF, so it returns `NULL` and the loop exits. – Barmar Jun 25 '18 at 15:53
  • "the first character read in a line is a null character" results in `line[0]==0`. Say input was `'\0'`, `'a'`, `'b'`, `'c'`, `'\n'`. `fgets()` returns non-`NULL`. – chux - Reinstate Monica Jun 25 '18 at 15:54
  • Also by testing `if (length > 0 && line[length-1] == '\n')` (corrected from above [comment](https://stackoverflow.com/questions/50997273/how-can-i-navigate-through-an-array-of-strings-of-any-length-in-c/50997315?noredirect=1#comment89047110_50997315)), code will not lop off the last character in the last "line" from a file that does not end with a `'\n'`. – chux - Reinstate Monica Jun 25 '18 at 15:59
  • Thanks. I got that right in another question I answered a few days ago, somehow didn't think of it this time. – Barmar Jun 25 '18 at 16:02
1

You ask for a example, so here it is:

the following proposed code:

  1. is written for readability
  2. checks for and handles error conditions
  3. makes use of getline() and realloc()
  4. is not as efficient as it could be because it calls realloc() for every line in the source file.
  5. properly/safely uses strcspn() for removing any (possible) trailing newline characters
  6. could have simplified the code by extracting the 'cleanup' to a sub function rather than repeating the same 'cleanup' code when ever an error was encountered.
  7. used size_t rather than int for indexes into arrays to avoid implicit conversions
  8. minimized the scope of variables where possible
  9. passes proper third parameter to qsort()
  10. properly implements the compare() helper function for qsort()

and now, the proposed code:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>


#define MAXNAMELEN 1024

// prototypes
int compare(const void *, const void *);


int main( void )
{
    printf("Enter a source filename: ");
    char name[ MAXNAMELEN ];
    if( !fgets(name, sizeof( name ), stdin) )
    {
        perror( "fgets for input file name failed" );
        exit( EXIT_FAILURE );
    }

    // implied else, fgets for input file name successful

    name[strcspn( name, "\n" ) ] = '\0'; // strip newline
    printf("%s infile \n",name);

    FILE *fp_in = fopen(name, "r");
    if( !fp_in )
    {
        perror( "fopen for input file failed" );
        exit( EXIT_FAILURE );
    }

    // implied else, fopen for input file successful

    char **linesarray = NULL;
    size_t numLines   = 0;

    char   *line      = NULL;
    size_t  lineLen   = 0;

    while( getline( &line, &lineLen, fp_in ) != -1 )
    {
        char ** temp = realloc( linesarray, (numLines+1) * sizeof( char* ) );
        if( !temp )
        {
            perror( "realloc failed" );
            fclose( fp_in );
            for( size_t i = 0; i< numLines; i++ )
            {
                free( linesarray[i]);
            }
            free( linesarray );
            exit( EXIT_FAILURE );
        }

        // implied else, realloc successful

        linesarray = temp;
        linesarray[ numLines ] = line;
        numLines++;

        // prep for next iteration
        line = NULL;
        lineLen = 0;
    }

    free( line );
    fclose( fp_in );

    //puts( "all file read in" );

    qsort( linesarray, numLines, sizeof( char * ), compare );

    //puts( "file sorted" );

    printf("Enter a destination filename: ");

    if( !fgets(name, sizeof(name), stdin) )
    {
        perror( "fgets for output file name failed" );

        for( size_t i = 0; i< numLines; i++ )
        {
            free( linesarray[i]);
        }
        free( linesarray );
        exit( EXIT_FAILURE );
    }

    // implied else, fgets() for output file name successful

    name[strcspn( name, "\n" ) ] = '\0'; // strip newline

    FILE *fp_out = fopen(name, "w");
    if( !fp_out )
    {
        perror( "fopen for output file failed" );

        for( size_t i = 0; i< numLines; i++ )
        {
            free( linesarray[i]);
        }
        free( linesarray );
        exit( EXIT_FAILURE );
    }

    // implied else, fopen for output file successful

    for (size_t i=0; i<numLines; i++)
    {
        if( fputs(linesarray[i], fp_out ) == EOF )
        {
            perror( "fputs failed" );
            fclose( fp_out );

            for( size_t i = 0; i< numLines; i++ )
            {
                free( linesarray[i]);
            }
            free( linesarray );
            exit( EXIT_FAILURE );
        }
    }

    fclose( fp_out );

    for( size_t i = 0; i< numLines; i++ )
    {
        free( linesarray[i]);
    }
    free( linesarray );
}


int compare(const void *ls, const void *rs )
{
    char *leftSide  = *(char**)ls;
    char *rightSide = *(char**)rs;
    return strcmp( leftSide, rightSide );
}
user3629249
  • 16,402
  • 1
  • 16
  • 17
  • Sneaky use of `line = NULL; lineLen = 0;` to get `getiline()` to do the `strdup()`. I'd expect a `free(line)` after the while loop to not leak memory. [This buffer should be freed by the user program even if getline() failed.](http://man7.org/linux/man-pages/man3/getline.3.html) – chux - Reinstate Monica Jun 25 '18 at 16:07
  • note: the pointer returned by `getline()` in the variable `line` is being saved into an entry in the array `linesarray[]` so cannot pass to `free()` until done with that array – user3629249 Jun 25 '18 at 17:19
  • "pointer returned by getline() in the variable line is being saved into an entry in the array " --> except for the last call of `getline()`. So if the loop ends because `getline()` returned `NULL` or `if( !temp )` path, `line` is not free'd and thus a leak. – chux - Reinstate Monica Jun 25 '18 at 19:25
  • @chux, `getline()`, at end of file, returns -1, not NULL. However, I do agree, that I should have passed the final entry in `line` to `free()`. I'll edit the posted code. – user3629249 Jun 26 '18 at 01:12
0

Here is the complete working solution to read in a file (big data), sort it and write it to a file:

#include <stdio.h>
#include <string.h> 
#include <stdlib.h>
#include <ctype.h>
#include <unistd.h>
#define MAXNAMELEN 1000
#define MAXLINELEN 5000
#define MAXITEMS 100000

char* linesptr[MAXITEMS];

int compare_function(const void *name1, const void *name2)
{
  const char *name1_ = *(const char **)name1;
  const char *name2_ = *(const char **)name2;
  return strcmp(name1_, name2_);
}

int
main(int argc, char ** argv) 
{

 FILE * infile, * outfile;
 char name[MAXNAMELEN];
 char line[MAXLINELEN];

 int i, items = 0;

 printf("Enter a source filename: ");
 fgets(name, MAXNAMELEN, stdin);
 name[strlen(name)-1] = '\0'; // strip newline
 infile = fopen(name, "r");
 while (fgets(line, MAXLINELEN, infile)) {
    int length = strlen(line);
    line[length-1] = '\0';
    char *linecopy = malloc(length);
    strcpy(linecopy, line);
    linesptr[items] = linecopy;
    items++;
 }

 qsort(linesptr, items, sizeof(char *), compare_function);

 printf("Enter a destination filename: ");
 fgets(name, sizeof(name), stdin);
 name[strlen(name)-1] = '\0'; // strip newline
 outfile = fopen(name, "w");
 for (i=0; i<items; i++) {
    fprintf(outfile, "%s\n", linesptr[i]);
 }
 fclose(infile);
 fclose(outfile);
}
Akshaya
  • 53
  • 5