1

I'm trying to create a program that reads a file (from *argv[]) into memory, then outputs to terminal.

#include <stdio.h>
#include <stdlib.h>

int main(int argc, char const *argv[])
{
    FILE *fp;
    fp = fopen(argv[1], "r");
    unsigned long size;

    fseek(fp, 0, SEEK_END);
    size = ftell(fp);
    rewind(fp);

    //printf("\n");

    int *dict = malloc(size);
    unsigned long i;
    for (i = 0; i <= size; ++i) 
        *(dict + i) = getc(fp);

    for (i = 0; i <= size - 1; ++i)
        printf("%c", *(dict + i));

    free(dict);
    return 0;
}

After compiling and running the program, it returns malloc(): corrupted top size. Uncommenting printf("\n"); will allow the program to work as intended.

chqrlie
  • 131,814
  • 10
  • 121
  • 189
  • 1
    You're going [one byte] _beyond_ the end of the allocated area (i.e. UB -- undefined behavior). You're overwriting/corrupting the internal heap structures. Change your `for` loops (e.g.): `for (i = 0; i < size; ++i)` – Craig Estey Jun 19 '22 at 19:26
  • And, `dict` is `int *`. You want: `char *dict = malloc(size);` Otherwise, the indexing goes 4x too far (because `sizeof(int) == 4` [on most architectures]). – Craig Estey Jun 19 '22 at 19:28
  • 1
    Who taught you to use `ftell()` to get the size of a file? It doesn't work in general. Per [**7.21.9.4 The ftell function**](https://www.port70.net/~nsz/c/c11/n1570.html#7.21.9.4p2) of the C11 standard: "... For a text stream, its file position indicator contains unspecified information, usable by the fseek function for returning the file position indicator for the stream to its position at the time of the ftell call; **the difference between two such return values is not necessarily a meaningful measure of the number of characters written or read.**" On Windows you will get the wrong size – Andrew Henle Jun 19 '22 at 19:54
  • 1
    @AndrewHenle: The link you posted has SSL certificate issues, which causes my browser to provide a warning message. There are two ways to fix this issue: 1. Remove the `www.` from the URL, or 2. change `https://` to `http://`. – Andreas Wenzel Jun 19 '22 at 20:07
  • @AndreasWenzel Thanks you. HTTP link: http://www.port70.net/~nsz/c/c11/n1570.html#7.21.9.4p2 – Andrew Henle Jun 19 '22 at 20:21
  • 1
    Side note: Instead of writing `*(dict + i)`, it is more common to write the equivalent expression `dict[i]`. This also makes your program easier to read. – Andreas Wenzel Jun 19 '22 at 20:33
  • Not only is using `ftell` to get the file size unreliable, it is just a bad idea. If you want to read data until all the data is gone, then do that. Don't try to pre-judge how much data will be given; just read until you cannot read any more. – William Pursell Jun 19 '22 at 20:41
  • 1
    Why read it entirely into memory at all? Just copy a bufferload at a time. This will get rid of most of your problems, as well as the ones you haven't anticipated, such as that the file won't fit, is larger than 2GB, etc. – user207421 Jun 20 '22 at 00:16

3 Answers3

1

There are multiple problems in your code:

  • you open the file in text mode, hence the value returned by ftell() is not guaranteed to be the length of the file.
  • you do not test for fopen failure.
  • you allocate an array of int with a byte size of size. This array is way too short to store size bytes, one per entry. You should instead allocate an array of char, possibly with a length of size + 1 to allow for a null terminator.
  • you do not test for allocation failure.
  • the loop for (i = 0; i <= size; ++i) iterates once too far. You read size + 1 bytes from the file. If the array had been allocated as char *dict = malloc(size); you would still have undefined behavior.
  • the output loop writes one byte at a time in an inefficient way. You should just use fwrite.

Here is a modified version:

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main(int argc, char *argv[]) {
    if (argc < 2) {
        fprintf(stderr, "missing argument\n");
        return 1;
    }
    FILE *fp = fopen(argv[1], "rb");
    if (fp == NULL) {
        fprintf(stderr, "cannot open %s: %s\n", argv[1], strerror(errno));
        return 1;
    }

    long size;
    if (fseek(fp, 0, SEEK_END) < 0 || (size = ftell(fp)) < 0) {
        fprintf(stderr, "cannot determine file size %s: %s\n",
                argv[1], strerror(errno));
        // should read the file and reallocate the buffer as needed
        fclose(fp);
        return 1;
    }

    char *dict = malloc(size + 1);
    if (dict == NULL) {
        fprintf(stderr, "cannot allocate memory: %s\n", strerror(errno));
        fclose(fp);
        return 1;
    }
    rewind(fp);
    size_t nread = fread(dict, 1, size, fp);
    if (nread != size) {
        fprintf(stderr, "only read %zu/%zu bytes\n", nread, size);
    }
    dict[nread] = '\0';

    size_t nwritten = fwrite(dict, 1, nread, fp);
    if (nwritten != nread) {
        fprintf(stderr, "only wrote %zu/%zu bytes\n", nwritten, nread);
    }
    fclose(fp);
    free(dict);
    return 0;
}
chqrlie
  • 131,814
  • 10
  • 121
  • 189
  • According to [§7.21.9.2 ¶3 of the ISO C11 standard](http://port70.net/~nsz/c/c11/n1570.html#7.21.9.2p3), on a binary stream, it is not guaranteed that using `fseek` with `SEEK_END` is meaningfully supported. Therefore, as far as I can tell, the only way to determine the file size that is guaranteed to work is to read the entire file. However, I am still upvoting your answer, because I am unaware of any modern platform on which this actually is an issue. – Andreas Wenzel Jun 20 '22 at 00:43
  • @AndreasWenzel: You are correct. I amended the answer and tested the return value of `fseek()`. There are many cases where seeking will not provide meaningful information: if the file is a device, if the file is modified concurrently by another process, either extending or truncating it... writing binary content to `stdout` open in text mode might also pose problems on some platforms, such as duplication of CR bytes: the output redirected to a file will be different from the source file. – chqrlie Jun 20 '22 at 07:51
1

Accessing the memory buffer out of bounds

The line

*(dict + i) = getc(fp);

will access dict out of bounds, for the following reasons:

The variable dict is a pointer to a memory buffer of size bytes. The type of this pointer is int *. However, that does not mean that there is room for size elements of type int in the memory buffer. On most platforms, an int has a size of 4 bytes. Therefore, the memory buffer only has room for size / 4 elements of type int.

For example, if size is 100, then dict will be pointing to a memory buffer of 100 bytes, which can only store 25 int elements. In that case, valid indexes range from 0 to 24.

However, in your loop

for (i = 0; i <= size; ++i) 

your index is going from 0 to size inclusive, which, if we stay with our example of size being 100, would be from 0 to 100 including 100. However, as previously stated, in that example, valid indexes are from 0 to 24, because there is only room for 25 int elements. Since the highest index you are using is 100 instead of 24, you are accessing the array out of bounds.

In order to fix this, you should do two things:

You should change the line

int *dict = malloc(size);

to

char *dict = malloc(size);

so that, if we stay with the example of size being 100, you can now store 100 elements of type char in the memory buffer, instead of only 25 elements of type int.

You should also change the line

for (i = 0; i <= size; ++i) 

to

for (i = 0; i < size; ++i)

so that, if we stay with the example of size being 100, it uses the indexes 0 to 99 instead of 0 to 100, because 100 is not a valid index and would be out of bounds.

Determining the length of a file

With the lines

fseek(fp, 0, SEEK_END);
size = ftell(fp);

you seem to be attempting to determine the length of the file. However, in ISO C, there is no reliable way to determine the length of a file without reading the entire file, because

  1. when the stream is opened in text mode, the return value of the function ftell is unspecified and only meaningful as input to the function fseek (i.e. it does not necessarily specify the length of the file), and
  2. when the stream is opened in binary mode, a function call to fseek with the argument SEEK_END is not guaranteed to be meaningfully supported.

Therefore, even if those lines of code that you are using happen to work on your platform, they are not guaranteed to work on all platforms.

Since the only way to determine the file size that is guaranteed to work on all ISO C compliant platforms is to read the entire file, in the following section, I will provide such a solution, which reads the entire file once and grows the memory buffer as necessary.

Solution which is guaranteed to work on all platforms

The following solution does not rely on any platform-specific behavior and is guaranteed to work on all ISO C compliant platforms.

#include <stdio.h>
#include <stdlib.h>

#define INITIAL_BUFFER_CAPACITY 512

int main( int argc, char *argv[] )
{
    FILE *fp;
    char *data;
    size_t data_size = 0, data_capacity = INITIAL_BUFFER_CAPACITY;
    int c;

    //verify that argv[1] exists
    if ( argc < 2 )
    {
        fprintf( stderr, "This program requires an argument.\n" );
        exit( EXIT_FAILURE );
    }

    //attempt to open file
    fp = fopen( argv[1], "r" );
    if ( fp == NULL )
    {
        fprintf( stderr, "Error opening file!\n" );
        exit( EXIT_FAILURE );
    }

    //allocate memory for initial memory buffer
    data = malloc( data_capacity );
    if ( data == NULL )
    {
        fprintf( stderr, "Memory allocation error!\n" );
        exit( EXIT_FAILURE );
    }

    //read the entire file one character per loop iteration
    while ( ( c = getc( fp ) ) != EOF )
    {
        //grow the buffer, if necessary
        if ( data_size == data_capacity )
        {
            data_capacity *= 2;

            data = realloc( data, data_capacity );

            if ( data == NULL )
            {
                fprintf( stderr, "Memory allocation error!\n" );
                exit( EXIT_FAILURE );
            }
        }

        //write the character to the memory buffer and update the size
        data[data_size++] = c;
    }

    //print the entire file contents as text
    fwrite( data, data_size, 1, stdout );

    //cleanup
    free( data );
    fclose( fp );

    return EXIT_SUCCESS;
}

Here is a more efficient solution which uses fread to read as much as possible at once, instead of only one character at a time.

#include <stdio.h>
#include <stdlib.h>

#define INITIAL_BUFFER_CAPACITY 512

int main( int argc, char *argv[] )
{
    FILE *fp;
    char *data = NULL;
    size_t data_size = 0, data_capacity = INITIAL_BUFFER_CAPACITY;

    //verify that argv[1] exists
    if ( argc < 2 )
    {
        fprintf( stderr, "This program requires an argument.\n" );
        exit( EXIT_FAILURE );
    }

    //attempt to open file
    fp = fopen( argv[1], "r" );
    if ( fp == NULL )
    {
        fprintf( stderr, "Error opening file!\n" );
        exit( EXIT_FAILURE );
    }

    //fill the memory buffer in every loop iteration and expand it
    //for the next read
    for (;;) //infinite loop, equivalent to while(1)
    {
        size_t bytes_read, bytes_to_read;

        //grow the buffer to desired capacity
        data = realloc( data, data_capacity );
        if ( data == NULL )
        {
            fprintf( stderr, "Memory allocation error!\n" );
            exit( EXIT_FAILURE );
        }

        //calculate number of bytes to read in next read operation
        bytes_to_read = data_capacity - data_size;

        //attempt to fill the read buffer        
        bytes_read = fread( data + data_size, 1, bytes_to_read, fp );

        //update size of data
        data_size += bytes_read;

        //break out of the infinite loop if the read buffer could
        //not be filled entirely
        if ( bytes_read != bytes_to_read )
            break;
        
        //change desired capacity for next loop iteration
        data_capacity *= 2;
    }

    //print the entire file contents as text
    fwrite( data, data_size, 1, stdout );

    //cleanup
    free( data );
    fclose( fp );

    return EXIT_SUCCESS;
}
Andreas Wenzel
  • 22,760
  • 4
  • 24
  • 39
0

This will work: I changed the part of malloc, that was defined int before, (The int* exceeds the memory allocete) also, you read the dict with "%c" so that i converted it to char*.

Also added check for fseek() if returns error, we exit!

#include <stdio.h>
#include <stdlib.h>

int main(int argc, char const *argv[])
{
    unsigned long size;
    FILE* fp = fopen(argv[1], "rb");
    if (fp != NULL) {
        if( fseek(fp, 0, SEEK_END) ){ // sets the file position of the SEEK_END
          fclose(fp);
          return -1;
        }

        size = ftell(fp); // got size
    } else {
        return -1; // file not found!
    }

    char *dict = malloc(size);

    unsigned long i;
    for (i = 0; i <= size; ++i) 
        *(dict + i) = getc(fp);

    for (i = 0; i <= size-1; ++i)
        printf("%c", dict[i]); // changed for better readability

    free(dict);
    fclose(fp);
    return 0;
}

References:

How do you determine the size of a file in C?

Reading a file using argc and argv