1

I'm making an upload form via a CGI interface. I'm writing it in C and don't want to use any outside libraries (ie. cgic).

I thought the program was complete, as the first test files uploaded correctly. But they were ASCII files. When I tested with a binary file (JPG). It seems that STDIN is trying to read the binary data as ASCII which creates a problem for characters like \0 which is present at the end of an ASCII file, but is a common character in binary files. The results of uploading a 1.9MB file end up with a 38kB file.

When searching how to change the STDIN stream to binary, I was referred to the command freopen and told to use NULL as the argument for the file. example 1

It says:

If filename is a null pointer, the freopen() function shall attempt to change the mode of the stream to that specified by mode, as if the name of the file currently associated with the stream had been used. In this case, the file descriptor associated with the stream need not be closed if the call to freopen() succeeds. It is implementation-defined which changes of mode are permitted (if any), and under what circumstances.

But when I check the man page on my system with man 3 freopen, it doesn't say any of this at all. Furthermore, reading the man page, I find out the the option for binary (adding 'b' to the mode) is no longer recognized and only exists for archaic compliancy:

The mode string can also include the letter 'b' either as a last character or as a character between the characters in any of the two-character strings described above. This is strictly for compatibility with C89 and has no effect; the 'b' is ignored on all POSIX conforming systems, including Linux.

So right now I'm completely lost. How can I change the STDIN stream to read binary input?

Here is the code:

#include <stdio.h>
#include <stdlib.h>
#include <libgen.h>
#include <string.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>

// Declare constants.
#define BUF_SIZE                 4096
#define FILENAME_SIZE             500
#define MARKER_SIZE               100
#define RETURN_FAILURE              0
#define RETURN_SUCCESS              1
#define SEARCH_STRING_1             "filename=\""
#define SEARCH_STRING_2             "\r\n\r\n"

// Declare global variables.
char filename[FILENAME_SIZE + 1];
char *program_name;

// Declare function prototype.
void print_footer (void);
void print_header (void);
void process_input (char *data);

int main (int argc, char *argv[])
{
// Declare variables.
    long long ret;
    char buf[BUF_SIZE + 1];

// Get program name for error reporting.
    program_name = basename(argv[0]);

// Prepare output for browser.
    print_header();

// Protect variable against buffer overflow.
    buf[BUF_SIZE] = '\0';

// Loop through all the file data.
    while(1)
    {
// Read in the next block of data.
        if((ret = (long long) fread(buf, 1, BUF_SIZE, stdin)) != BUF_SIZE)
        {
// Check for error.
            if(ferror(stdin) != 0)
            {
                printf("%s: An error occurred while reading the input file.<br>\n", program_name);
                process_input(NULL);
                exit(EXIT_FAILURE);
            }
// Check for EOF.
            else if(feof(stdin) != 0)
                break;
        }

// Terminate and process uploaded data.
        buf[ret] = '\0';
        process_input(buf);
    }

// Terminate and process uploaded data.
    buf[ret] = '\0';
    process_input(buf);

// Finish user output, close output file and exit.
    print_footer();
    process_input(NULL);
    exit(EXIT_SUCCESS);
}

void process_input (char *data)
{
// Declare variables.
    char *ptr1= NULL;
    char *ptr2;
    int x = 0;
    static FILE *fp;
    static int flag = 0;
    static char marker[MARKER_SIZE + 1];

// If data is NULL, close output file.
    if(data == NULL)
    {
        if(fclose(fp) == EOF)
        {
            printf("%s: process_input: close failed (%s)<br>\n", program_name, strerror(errno));
            exit(EXIT_FAILURE);
        }

        return;
    }

// Check if this is the first time through.
    if(flag == 0)
    {
// Get marker.
        if((ptr1 = strchr(data, '\n')) == NULL)
        {
            printf("%s: process_input: strchr(1) failed (\n)<br>\n", program_name);
            exit(EXIT_FAILURE);
        }

        ptr1[0] = '\0';
        strcpy(marker, data);
        ptr1[0] = '\n';

// Get filename.
        if((ptr1 = strstr(data, SEARCH_STRING_1)) == NULL)
        {
            printf("%s: process_input: strstr(1) failed (%s)<br>\n", program_name, SEARCH_STRING_1);
            exit(EXIT_FAILURE);
        }

// Advance pointer to start of filename.
        ptr1 += 10;

// Find end of filename.
        if((ptr2 = strchr(ptr1, '"')) == NULL)
        {
            printf("%s: process_input: strchr(2) failed (\")<br>\n", program_name);
            exit(EXIT_FAILURE);
        }

// Terminate and store filename.
        ptr2[0] = '\0';
        strcpy(filename, ptr1);
        ptr2[0] = '"';

// Remove spaces from filename.
        while(filename[x] != '\0')
        {
            if(filename[x] == ' ')
                filename[x] = '.';

            x++;
        }

// Open output file.
        if((fp = fopen(filename, "wb")) == NULL)
        {
            printf("%s: process_input: fopen failed (%s) (%s)<br>\n", program_name, strerror(errno), filename);
            exit(EXIT_FAILURE);
        }

// Find start of file data.
        if((ptr1 = strstr(data, SEARCH_STRING_2)) == NULL)
        {
            printf("%s: process_input: strstr(2) failed (%s)<br>\n", program_name, SEARCH_STRING_2);
            fclose(fp);
            exit(EXIT_FAILURE);
        }

// Set flag.
        flag++;
// Advance pointer to start of file data.
        ptr1 += 4;

// Change STDIN stream to binary.
        if(freopen(NULL, "rb", stdin) == NULL)
        {
            printf("%s: process_input: freopen failed (%s)<br>\n", program_name, strerror(errno));
            fclose(fp);
            exit(EXIT_FAILURE);
        }
    }
// Catch everything else.
    else
    {
        ptr1 = data;

        if((ptr2 = strstr(ptr1, marker)) != NULL)
            ptr2[0 - 2] = '\0';
    }

// Write file data.
    if(fwrite(ptr1, 1, strlen(ptr1), fp) != strlen(ptr1))
    {
        printf("%s: process_input: write failed (%s)<br>\n", program_name, strerror(errno));
        fclose(fp);
        exit(EXIT_FAILURE);
    }
}

void print_footer (void)
{
    printf("\nMade it!\n");
}

void print_header (void)
{
    printf("Content-type: text/plain\r\n\r\n");
}
Community
  • 1
  • 1
Deanie
  • 2,316
  • 2
  • 19
  • 35
  • It is not the stream, it's your code. All file handles in Linux and other POSIXy systems are always "binary"; they do not massage the stream contents at all. You should be using `fread()` to read the POST data, not `fgets()` or `getline()` or `getdelim()`, because POST data is inherently binary, not text lines. – Nominal Animal Apr 15 '17 at 16:51
  • @NominalAnimal I am using `fread` to read STDIN. – Deanie Apr 15 '17 at 16:53
  • Do you also use `fwrite` to write the de-escaped (if of MIME type application/x-www-form-urlencoded) or delimited (if of MIME type multipart/form-data) data to your output file? None of the CGI implementations I've used on POSIXy systems have any issues with any specific characters, you see. The issue is definitely in your code -- which you have chosen not to show --, not in any library function. – Nominal Animal Apr 15 '17 at 16:57
  • @NominalAnimal Yes, I use fwrite to write the data to file. I've added my code above. – Deanie Apr 15 '17 at 17:10
  • You treat the `fread()` data as a string (by using `strstr()`, `strlen()`, and so on). That does not work, because the first nul character (`'\0'`) they encounter they consider the end of the data. *The data is not a string.* This is why you only process the initial part of the POST data block. – Nominal Animal Apr 15 '17 at 19:04
  • @NominalAnimal TYVM – Deanie Apr 15 '17 at 19:06
  • You might wish to take a look at e.g. [qdecoder](http://qdecoder.org/) sources to see how the existing implementatinos handle POST data. However, [FastCGI](https://en.wikipedia.org/wiki/FastCGI) is in some ways even easier to implement (than CGI). – Nominal Animal Apr 15 '17 at 20:16

2 Answers2

0

Ok, it appears what @NominalAnimal said was correct. You can store binary data in a string, but the moment you use any function in the string.h library, it almost always changes what is stored in that string (if the data is binary).

The easy solution is to make a separate function that takes a pointer to the binary data and do your string searches in that function, returning what pertinent information is needed. That way, the original data is never changed.

Deanie
  • 2,316
  • 2
  • 19
  • 35
-1

'stdin' is a macro of STDIN_FILENO, which is egal to 0. See also 'unistd.h'. You are not showing your code, but I think you stop when you encounter a '\0' or a non-ascii char, since you said you were using 'fread()'.

You have to stop when fread() function returns 0, which means it stopped to read : it encountered EOF.

tfontain
  • 66
  • 12
  • The name `stdin` is not really related to the POSIX name STDIN_FILENO. They both refer to standard input, but in very different ways. – Jonathan Leffler Apr 15 '17 at 17:14
  • I don't believe this to be the case. I never test for `'\0'`, it's all based on the return value of `fread` and whether the `FEOF` and `FERROR` flags have been set. – Deanie Apr 15 '17 at 17:15