1

This is one of those, "I know there is a better way", questions.

The below code is from a larger project I am working on, and the helper function to convert a hex value to a binary string is buried deep in the code.

Essentially, I want to make sure that the char array pointed to by *return_string has sufficient space for the binary string generated by the function.

As it stands, if I don't allocate enough memory, I get a SIGABRT. It would be far more elegant if I just returned -1.

The research I have done tells me that what I am trying to do is a bit tricky, since there is really no way to bounds check an empty array via its pointer.

For reasons that I don't understand, if I use the common macro:

#define ARRAYSIZE(arr) (sizeof(arr) / sizeof(arr[0]))

On *return_string, I always get the same value. I am new to C - I am sure I am missing something elementary here.

Anyway, here's the code. What do you all recommend? Thank you!

(FYI - I know that as written, I do not have enough memory allocated)

(FYI - I also know that this is not the fastest implementation of a hex to binary conversion. O(n) is good enough for this particular application)

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

int hex_string_to_bin_string(char *return_string, size_t bin_length, const char *input);

int main(int argc, const char * argv[]) {

    char return_string1[12];
    char return_string2[12];
    char return_string3[12];

    char input1[8] = "0x4708";
    char input2[8] = "4708";
    char bad_input[8] = "47Q8";

    fprintf(stderr, "answer should be:\t\t\t 0100011100001000\n");

    //test string 1
    if(hex_string_to_bin_string(return_string1, 16, input1) < 0 ) {

        fprintf(stderr, "FAILED return string 1\n");

    } else {

        fprintf(stderr, "return string 1:\t\t\t %s\n", return_string1);
    }

    //test string 2
    if(hex_string_to_bin_string(return_string2, 16, input2) < 0 ) {

        fprintf(stderr, "FAILED return string 2\n");

    } else {

        fprintf(stderr, "return string 2:\t\t\t %s\n", return_string2);
    }

    //test bad input
    if(hex_string_to_bin_string(return_string3, 16, bad_input) < 0) {
        fprintf(stderr, "PASSED bad character test\n");
    }

    else {

        fprintf(stderr, "FAILED bad character test\n");
    }

    return 0;
}

int hex_string_to_bin_string(char *return_string, size_t bin_length, const char *input) {

    /*
     0 - 0000
     1 - 0001
     2 - 0010
     3 - 0011
     4 - 0100
     5 - 0101
     6 - 0110
     7 - 0111
     8 - 1000
     9 - 1001
     A - 1010
     B - 1011
     C - 1100
     D - 1101
     E - 1110
     F - 1111
    */

    //check input to make sure length is good
    if(strlen(input) == 0 ) {

        return -1;
    }

    //check for leading '0x' and strip if necessary
    if(input[0] == '0' && input[1] == 'x') {
        input += 2;
    }

    //loop through input and replace
    int i;

    char bits[5];

    for(i=0; i<strlen(input); i++) {

        switch(input[i]) {

            case '0': {

                if( snprintf(bits, sizeof(bits), "0000") < 0) {

                    return -1;
                }

                break;
            }
            case '1': {

                if( snprintf(bits, sizeof(bits), "0001") < 0) {

                    return -1;
                }

                break;
            }
            case '2': {

                if( snprintf(bits, sizeof(bits), "0010") < 0) {

                    return -1;
                }

                break;
            }
            case '3': {

                if( snprintf(bits, sizeof(bits), "0011") < 0) {

                    return -1;
                }

                break;
            }
            case '4': {

                if( snprintf(bits, sizeof(bits), "0100") < 0) {

                    return -1;
                }

                break;
            }
            case '5': {

                if( snprintf(bits, sizeof(bits), "0101") < 0) {

                    return -1;
                }

                break;
            }
            case '6': {

                if( snprintf(bits, sizeof(bits), "0110") < 0) {

                    return -1;
                }

                break;
            }
            case '7': {

                if( snprintf(bits, sizeof(bits), "0111") < 0) {

                    return -1;
                }

                break;
            }
            case '8': {

                if( snprintf(bits, sizeof(bits), "1000") < 0) {

                    return -1;
                }

                break;
            }
            case '9': {

                if( snprintf(bits, sizeof(bits), "1001") < 0) {

                    return -1;
                }

                break;
            }
            case 'A': {

                if( snprintf(bits, sizeof(bits), "1010") < 0) {

                    return -1;
                }

                break;
            }
            case 'B': {

                if( snprintf(bits, sizeof(bits), "1011") < 0) {

                    return -1;
                }

                break;
            }
            case 'C': {

                if( snprintf(bits, sizeof(bits), "1100") < 0) {

                    return -1;
                }

                break;
            }
            case 'D': {

                if( snprintf(bits, sizeof(bits), "1101") < 0) {

                    return -1;
                }

                break;
            }
            case 'E': {

                if( snprintf(bits, sizeof(bits), "1110") < 0) {

                    return -1;
                }

                break;
            }
            case 'F': {

                if( snprintf(bits, sizeof(bits), "1111") < 0) {

                    return -1;
                }

                break;
            }

            default: {

                return -1;
            }
        }

        if(i == 0) {
            snprintf(return_string, bin_length+1, "%s", bits);
        }

        else {
            snprintf(return_string, bin_length+1, "%s%s", return_string, bits);
        }
    }

    return 0;
}
John Davis
  • 223
  • 1
  • 6
  • 2
    `sizeof return_string` tells you the size of `return_string`, which is a pointer. Not the size of what it might be pointing to. – M.M Oct 30 '15 at 12:18
  • 2
    You can't `snprintf` a string onto itself; That's [undefined behaviour](http://stackoverflow.com/questions/1283354/is-sprintfbuffer-s-buffer-safe). – M Oehm Oct 30 '15 at 12:27
  • 1
    Related: http://stackoverflow.com/q/492384/694576 – alk Oct 30 '15 at 17:15
  • Thanks @alk and @M.M! Great knowledge from both. – John Davis Oct 30 '15 at 18:27

2 Answers2

3

You must pass the size of the output buffer as an argument.

Since the type of the argument is char *, there is no way to magically compute the number of characters available to store there. That information is simply not present, which is why you as the programmer must take responsibility for passing it into the function.

unwind
  • 391,730
  • 64
  • 469
  • 606
2

I recommend that the conversion function's interface be changed to include a length for the buffer receiving the text.

That is the approach for the secure versions of the various string related functions of the C Standard Library such as sprintf_s().

In C a text string is really an array of characters. The text string is terminated by a binary zero to indicate the end of the string however this is not something that is enforced by the compiler other than for constant strings such as a definition like char textStuff[] = "this is text"; which will create an array textStuff that has enough array elements to contain the twelve text characters this is text as well as the thirteenth character of a terminating binary zero for the end of string.

Languages such as Java use resource handles rather than pointers so additional information such as array lengths is available at run time. And other languages have an actual text string type.

At the point where an array is defined, the size of the array is known by the compiler so to find the number of elements in the array you can do something like sizeof(textStuff)/sizeof(textStuff[0]) to determine the number of elements by dividing the total size of the array by the size of the first element of the array.

However once a function is called specifying the array in the interface as an array of unknown size or as a pointer, the array essentially becomes transformed into a pointer and any size information that was available at the point in the source where the array was defined is no longer available to the compiler.

Richard Chambers
  • 16,643
  • 4
  • 81
  • 106