-3
/**
 * Copyright (c) 2006-2018 Apple Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 **/

#include "base64.h"

#include <stdlib.h>
#include <string.h>
#include <stdio.h>

 // base64 tables
static const char basis_64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static const signed char index_64[128] = {
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62,
    -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0,
    1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
    23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
    39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1 };
#define CHAR64(c) (((c) < 0 || (c) > 127) ? -1 : index_64[(c)])

// base64_encode    :    base64 encode
//
// value            :    data to encode
// vlen             :    length of data
// (result)         :    new char[] - c-str of result
char* base64_encode(const unsigned char* value, size_t vlen) {
    char* result = (char*)malloc((vlen * 4) / 3 + 5);
    if (result == NULL) {
        return NULL;
    }
    char* out = result;
    while (vlen >= 3) {
        *out++ = basis_64[value[0] >> 2];
        *out++ = basis_64[((value[0] << 4) & 0x30) | (value[1] >> 4)];
        *out++ = basis_64[((value[1] << 2) & 0x3C) | (value[2] >> 6)];
        *out++ = basis_64[value[2] & 0x3F];
        value += 3;
        vlen -= 3;
    }
    if (vlen > 0) {
        *out++ = basis_64[value[0] >> 2];
        unsigned char oval = (value[0] << 4) & 0x30;
        if (vlen > 1)
            oval |= value[1] >> 4;
        *out++ = basis_64[oval];
        *out++ = (vlen < 2) ? '=' : basis_64[(value[1] << 2) & 0x3C];
        *out++ = '=';
    }
    *out = '\0';

    return result;
}

// base64_decode    :    base64 decode
//
// value            :    c-str to decode
// rlen             :    length of decoded result
// (result)         :    new unsigned char[] - decoded result
unsigned char* base64_decode(const char* value, size_t* rlen) {
    *rlen = 0;
    int c1, c2, c3, c4;

    size_t vlen = strlen(value);
    unsigned char* result = (unsigned char*)malloc((vlen * 3) / 4 + 1);
    if (result == NULL) {
        return NULL;
    }
    unsigned char* out = result;

    while (1) {
        if (value[0] == 0) {
            //*out = '\0';
            return result;
        }
        c1 = value[0];
        if (CHAR64(c1) == -1) {
            goto base64_decode_error;
            ;
        }
        c2 = value[1];
        if (CHAR64(c2) == -1) {
            goto base64_decode_error;
            ;
        }
        c3 = value[2];
        if ((c3 != '=') && (CHAR64(c3) == -1)) {
            goto base64_decode_error;
            ;
        }
        c4 = value[3];
        if ((c4 != '=') && (CHAR64(c4) == -1)) {
            goto base64_decode_error;
            ;
        }

        value += 4;
        *out++ = (CHAR64(c1) << 2) | (CHAR64(c2) >> 4);
        *rlen += 1;

        if (c3 != '=') {
            *out++ = ((CHAR64(c2) << 4) & 0xf0) | (CHAR64(c3) >> 2);
            *rlen += 1;

            if (c4 != '=') {
                *out++ = ((CHAR64(c3) << 6) & 0xc0) | CHAR64(c4);
                *rlen += 1;
            }
        }
    }

base64_decode_error:
    *result = 0;
    *rlen = 0;

    return result;
}
// Below is my test.
int main() {
    char str[] = "123456";
    char* encoded = base64_encode(str, strlen(str));
    printf("encode : %s\n", encoded);
    int rlen = 3;
    char* decoded = base64_decode(encoded, &rlen);
    printf("decode : %s\n", decoded);
    printf("len: %d\n", strlen(decoded));
    return 0;
}

Result (cl.exe) : encode : MTIzNDU2 decode : 123456? len: 7

Then I thought these code may work in unix, and the compiler may be cc(gcc). So I compiled these code in my WSL with gcc, and it worked correctly.

Result (gcc) : encode : MTIzNDU2 decode : 123456 len: 6

Why gcc is correct but cl is not?

What's the difference between these two compiler in this case?

sepp2k
  • 363,768
  • 54
  • 674
  • 675
Zmz
  • 3
  • 2
  • 4
    Your compiler warnings should have pointed out at least one way in which your test code is broken. (If you're compiling with warnings off, turn them on.) – user2357112 May 26 '23 at 10:22
  • Perhaps `base64.h` uses the `long` type expecting it to have 64 bits. With MSVC it is 32 bits. – Weather Vane May 26 '23 at 10:27
  • [Undefined, unspecified and implementation-defined behavior](https://stackoverflow.com/questions/2397984/undefined-unspecified-and-implementation-defined-behavior) – Jesper Juhl May 26 '23 at 10:34
  • As pointed out by the answer below this is a case of incorrect expectations (sometimes known as RTFM). You expected `base64_decode` to return a null terminated string, but it does not. So the code you found is not bugged, the code you wrote might have worked in different circumstances, but in this case it's a bugged program. – john May 26 '23 at 10:36
  • 3
    Also worth saying that base64 is often used to encode binary data, so there is no particular reason to expect the output of a base64 decoding routine to be null terminated. – john May 26 '23 at 10:40
  • Replace your *malloc* by *calloc*. – CristiFati May 26 '23 at 10:41

2 Answers2

6

Both compilers are correct. The shown code results in undefined behavior. base64_decode() fails to return a '\0'-terminated string, so both using the returned char pointer with printf and strlen results in undefined behavior.

Sam Varshavchik
  • 114,536
  • 5
  • 94
  • 148
1

There's a reason that base64_decode returns a length to you. If (but only if) you know that the encoded text had been human-readable, you could print it back out like this:

int rlen;
char* decoded = base64_decode(encoded, &rlen);
printf("decode : %.*s\n", rlen, decoded);
printf("len: %d\n", rlen);

%.*s tells printf that the length of the string you're printing is given, not by the usual \0 terminator in the string, but rather, by an explicit length you pass in.

As mentioned in the comments, Base-64 encoding is usually used to encode arbitrary binary data, which might contain embedded \0 characters, and which typically isn't null-terminated. For arbitrary binary data, you usually want to carry the length around as a separate variable, like rlen here.

Another issue is that this base64_decode function is returning the length as a size_t value, not int. So you really need:

size_t rlen;
char* decoded = base64_decode(encoded, &rlen);
printf("decode : %.*s\n", (int)rlen, decoded);
printf("len: %zd\n", rlen);
Steve Summit
  • 45,437
  • 7
  • 70
  • 103
  • Yes. My thought was wrong. `base64_decode()` change `rlen` . That means I should use it when producing decoded data. For example, base64 usually be used to encode image. When I decode a image, I don't want a `\0` at the end. I should use `rlen` to get the edge of data. – Zmz May 27 '23 at 02:00