7

I can use the strtol function for turning a base36 based value (saved as a string) into a long int:

long int val = strtol("ABCZX123", 0, 36);

Is there a standard function that allows the inversion of this? That is, to convert a long int val variable into a base36 string, to obtain "ABCZX123" again?

anastaciu
  • 23,467
  • 7
  • 28
  • 53
Łukasz Przeniosło
  • 2,725
  • 5
  • 38
  • 74

3 Answers3

6

There's no standard function for this. You'll need to write your own one.

Usage example: https://godbolt.org/z/MhRcNA

const char digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";

char *reverse(char *str)
{
    char *end = str;
    char *start = str;

    if(!str || !*str) return str;
    while(*(end + 1)) end++;
    while(end > start)
    {
        int ch = *end;
        *end-- = *start;
        *start++ = ch;
    }
    return str;
}

char *tostring(char *buff, long long num, int base)
{
    int sign = num < 0;
    char *savedbuff = buff;

    if(base < 2 || base >= sizeof(digits)) return NULL;
    if(buff)
    {
        do
        {   
            *buff++ = digits[abs(num % base)];
            num /= base;
        }while(num);
        if(sign)
        {
            *buff++ = '-';
        }
        *buff = 0;
        reverse(savedbuff);
    }
    return savedbuff;
}
0___________
  • 60,014
  • 4
  • 34
  • 74
  • 2
    `base > sizeof(digits)` is incorrect because of the null terminator. The maximum base for this implementation is 62. Note also that it produces output that is not compatible with `strtol` for bases greater than 36. – chqrlie Jan 15 '20 at 14:49
  • 1
    `tostring` should also test for `base >= 2` to avoid undefined behavior. – chqrlie Jan 15 '20 at 14:56
  • Yes this implementation is feisty but legit. After providing all the checks, pointer assertions and reducing the base to a range of from 2 to 36 this works. Well enough for a fast answer, thank you. – Łukasz Przeniosło Jan 15 '20 at 15:06
  • 3
    Since you already have a pointer to the end of the buffer, it would be a bit more efficient to pass that to `reverse()` instead of having it re-find the end. – Toby Speight Jan 15 '20 at 15:31
4

One of the missing attributes of this "Convert long integer to base 36 string" is string management.

The below suffers from a potential buffer overflow when destination is too small.

char *long_to_string(char *destination, long num, int base);

(Assuming 32-bit long) Consider the overflow of below as the resultant string should be "-10000000000000000000000000000000", which needs 34 bytes to encode the string.

char buffer[33];                     // Too small
long_to_string(buffer, LONG_MIN, 2); // Oops! 

An alternative would pass in the buffer size and then provide some sort of error signaling when the buffer is too small.

char* longtostr(char *dest, size_t size, long a, int base)

Since C99, code instead could use a compound literal to provide the needed space - without calling code trying to compute the needed size nor explicitly allocate the buffer.

The returned string pointer from TO_BASE(long x, int base) is valid until the end of the block.

#include <assert.h>
#include <limits.h>
#define TO_BASE_N (sizeof(long)*CHAR_BIT + 2)

//                               v. compound literal .v
#define TO_BASE(x, b) my_to_base((char [TO_BASE_N]){""}, (x), (b))

char *my_to_base(char *buf, long a, int base) {
  assert(base >= 2 && base <= 36);
  long i = a < 0 ? a : -a;  // use the negative side - this handle _MIN, _MAX nicely
  char *s = &buf[TO_BASE_N - 1];
  *s = '\0';
  do {
    s--;
    *s = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"[-(i % base)];
    i /= base;
  } while (i);

  if (a < 0) {
    s--;
    *s = '-';
  }

  // Could add memmove here to move the used buffer to the beginning

  return s;
}

#include <limits.h>
#include <stdio.h>
int main(void) {
  long ip1 = 0x01020304;
  long ip2 = 0x05060708;
  long ip3 = LONG_MIN;
  printf("%s %s\n", TO_BASE(ip1, 16), TO_BASE(ip2, 16), TO_BASE(ip3, 16));
  printf("%s %s\n", TO_BASE(ip1, 2), TO_BASE(ip2, 2), TO_BASE(ip3, 2));
  puts(TO_BASE(ip1, 8));
  puts(TO_BASE(ip1, 36));
  puts(TO_BASE(ip3, 10));
}
chux - Reinstate Monica
  • 143,097
  • 13
  • 135
  • 256
2

Here is another option with no need for source array of charaters, but less portable since not all character encodings have contiguous alphabetic characters, for example EBCDIC. Test HERE

#include <stdio.h> 
#include <string.h> 
#include <stdlib.h>
#include <stdbool.h>
#include <limits.h>

char get_chars(long long value) 
{ 
    if (value >= 0 && value <= 9) 
        return value + '0'; 
    else
        return value - 10 + 'A'; 
} 

void reverse_string(char *str) 
{ 
    int len = strlen(str); 

    for (int i = 0; i < len/2; i++) 
    { 
        char temp = str[i]; 
        str[i] = str[len - i - 1]; 
        str[len - i - 1] = temp; 
    } 
} 

char* convert_to_base(char *res, int base, long long input) 
{ 
    bool flag = 0;
    int index = 0;   
    if(input < 0){  
       input = llabs(input);
       flag = 1;
    }
    else if(input == 0){
       res[index++] = '0';
       res[index] = '\0';
       return res;
    }      
       while(input > 0)
       {          
          res[index++] = get_chars(input % base); 
          input /= base; 
    } 
    if(flag){
        res[index++] = '-';
    }       
    res[index] = '\0';   
    reverse_string(res); 
    return res; 
} 

int main() {  
    long long input = 0;
    printf("** Integer to Base-36 **\n ");
    printf("Enter a valid number: ");
    scanf("%lld", &input); 
    if(input >= LLONG_MAX && input <= LLONG_MIN){
      printf("Invalid number");  
      return 0; 
    }

    int base = 36; 
    char res[100]; 
    printf("%lld -> %s\n", input, convert_to_base(res, base, input));

    return 0; 
}
anastaciu
  • 23,467
  • 7
  • 28
  • 53
  • It's not portable to just add to `'A'` like that - not all character encodings have contiguous alphabetic characters. – Toby Speight Jan 15 '20 at 15:33
  • 1
    The best-known example is [EBCDIC](https://en.wikipedia.org/wiki/EBCDIC), where `'J' = 'I' + 8` and `'S' = 'R' + 9`. – Toby Speight Jan 15 '20 at 15:44
  • @TobySpeight, thanks, allways nice to learn someting, I'll make sure to include that in my answer. – anastaciu Jan 15 '20 at 15:47
  • @anastaciu what about the sign ? You code miserable fails https://godbolt.org/z/7298e5 – 0___________ Jan 15 '20 at 16:15
  • @P__J__ what would I do without you, I corrected it and threw in the 0 for good measure. I even used your online compiler of choice. Let me know what you think. And... If you correct the issues in your code I'll even throw in an upvote. – anastaciu Jan 15 '20 at 17:38
  • @anastaciu It will not work for maximum (or rather minimum)negative value. It cannot be accommodated as a positive number – 0___________ Jan 15 '20 at 19:38
  • @P__J__, what do you mean? If it overflows it doesn't work, but [neither does yours](https://godbolt.org/z/W7DheL). – anastaciu Jan 15 '20 at 20:05
  • @anastaciu no. I do not change sign of the input number. You do. Absolute value of the same size 2 complement minimum negative number is larger than the maximum positive number. Tale a look what is the range of the signed types. My will work – 0___________ Jan 15 '20 at 20:08
  • @P__J__ that is a marginal issue, it just doesn't convert the limits.. – anastaciu Jan 15 '20 at 20:20
  • @anastaciu programs usually do not work for some data. As yours – 0___________ Jan 15 '20 at 23:19
  • `llabs(input)` is UB when `input == LLONG_MIN`. `get_chars(input % base)` also will fail. – chux - Reinstate Monica Jan 16 '20 at 04:02
  • @chux-ReinstateMonica, if it overflows it's UB for sure, but using the [limits](http://www.cplusplus.com/reference/climits/) it works, as is demonstraded [here](https://godbolt.org/z/zW5LZ8), about the `get_chars` fail I can't quite see it, except for the encoding issues, can you explain? – anastaciu Jan 16 '20 at 08:43
  • The UB of `llabs(LLONG_MIN)` is often `llabs(LLONG_MIN) --> LLONG_MIN`. Then `LLONG_MIN % base` is some negative number. `get_chars(negative number)` does not yield a desired result. – chux - Reinstate Monica Jan 16 '20 at 14:24
  • @chux-ReinstateMonica, Thanks for your response, I would argue that that is a question of controlling the input like [this](https://godbolt.org/z/8WzMoa). – anastaciu Jan 16 '20 at 15:05
  • Perhaps, yet I disagree. OP's goal is a "Convert long integer" function, not read input from user as restricted range. The goal is a full `long` range solution. IAC, providing a full range solution, including `INT_MIN` is not so difficult. [Example](https://stackoverflow.com/a/56404008/2410359) uses the negative side as there is more room there. In this case, code could use `if(input > 0){ input = -input; }`. Many possibilities. – chux - Reinstate Monica Jan 16 '20 at 15:29
  • 1
    @chux-ReinstateMonica It's a nice solutition, I would recomend it with the necessary change to long int. Lesson - search stackoverflow for a solution to your problem before asking a question. Just to finish my thoughts, as you correctly point out the OP is about converting long int to base-36, so providind code that allows for long long type is well within the specs of the problem. Of course your solution is cleaner. Nice exchange, looking forward to hear more from you. – anastaciu Jan 16 '20 at 18:12