-1

so I have this code, goal is to have a void *data pointer, which is sometimes used to store a simple int, sometimes a single char array and sometimes I need to store an array of char arrays. I'm making sure that I always know what type of data I have stored in the void pointer.

The code executes well in an online parser and this is the output of it:

sizeof 2 x char*: 8

str0: string1 addr: 2995278544

str1: bla2 addr: 2995278576

checking strings:

str0: string1 addr: 2995278544

str1: bla2 addr: 2995278576

The plan was to malloc space for n char* pointers and save that pointer do void *data. Then change the type to "char **ptr" (pointer to a pointer), so I can save the addresses which strdup returns to that array and access them later. checkItems(uint8_t) does exactly that, it re-accesses the "void *data" pointer by changing it to "char **ptr" again to be able to access the memory addresses where the actual C strings are saved.

is this all correct? would one do this differently? should I use some kind of cast to the void *data pointer instead simply saying "char **ptr = data;"?

Thanks!

#include<stdio.h>
#include<stdint.h>
#include<string.h>
#include<stdlib.h>
#include<stdarg.h>

void copyItems(uint8_t num, ...);
void checkItems(uint8_t num);

void *data;

int main()
{
    copyItems(2, "string1", "bla2");
    checkItems(2);
}

void copyItems(uint8_t num, ...)
{
    printf("sizeof %u x char*: %u\r\n", num, sizeof(char*), sizeof(char*)*num);
    data = malloc(sizeof(char*)*num);
    
    char **ptr = data;
    
    va_list ap;
    va_start(ap, num);
    
    for (uint8_t n = 0; n < num; n++)
    {
        ptr[n] = strdup(va_arg(ap, char*));
        printf("str%u: %s addr: %u\r\n", n, ptr[n], ptr[n]);
    }
    
    va_end(ap);
}

void checkItems(uint8_t num)
{
    char **ptr = data;
    
    printf("checking strings:\r\n");
    
    for (uint8_t n = 0; n < num; n++)
    {
        printf("str%u: %s addr: %u\r\n", n, ptr[n], ptr[n]);
    }
}
mrflash818
  • 930
  • 13
  • 24
azzurro
  • 1
  • 2
  • *"I'm making sure that I always know what type of data I have stored in the void pointer."* Where? I would have expected that information to be contained in a `struct` along with the pointer, *and* the number of items stored. – Weather Vane Dec 16 '21 at 23:12
  • yes, in fact the void pointer is contained within a struct where all the info is stored (including the number of items, of course). as I'm not having any issues with that, I wanted to keep the example as simple as possible. – azzurro Dec 16 '21 at 23:14
  • You shouldn't (need to) cast a void pointer, please see [Do I cast the result of malloc?](https://stackoverflow.com/questions/605845/do-i-cast-the-result-of-malloc) – Weather Vane Dec 16 '21 at 23:16
  • i figured that I can also say ((char**)data)[n] instead of creating char **ptr. question stays, whether this is allowed, correct and good practice, or just pure BS... – azzurro Dec 16 '21 at 23:17
  • If you are *dereferencing* the `void*` pointer, then you must cast it. For assigning to another pointer type, no. – Weather Vane Dec 16 '21 at 23:19
  • @WeatherVane if I just try to access the array element via data[n], where data is the void pointer, I get a "dereferencing ‘void *’ pointer" error. – azzurro Dec 16 '21 at 23:20
  • Which supports what I wrote. – Weather Vane Dec 16 '21 at 23:21
  • If I were to store 1 out of 3 types in a variable, I would use a `union`. – BoP Dec 16 '21 at 23:21
  • yes, we overlapped here. so is this correct how I did it? it seems to me but I'd like to be more confident. do you or does anyone have a thread or an article handy where a similar thing has been discussed/explained? Thanks! – azzurro Dec 16 '21 at 23:23

2 Answers2

0

I would store size and type information in the structure

typedef enum
{
    INT, 
    CHAR,
    CHARPTR,
}DATA_TYPE;

typedef struct 
{
    DATA_TYPE dtype;
    size_t size;
    void *data;
}DATA;

DATA *copyItems(size_t num, DATA_TYPE type, ...);
void PrintItems(const DATA *data);

size_t datasize(const DATA *data)
{
    size_t result;
    switch(data -> dtype)
    {
        case INT:
            result = sizeof(int);
            break;
        case CHAR:
            result = 1;
            break;
        case CHARPTR:
            result = sizeof(char *);
            break;
        default: 
            result = 0;
            break;
    }
    return result;
}




int main()
{
    DATA *data =  copyItems(2, CHARPTR, "string1", "bla2");
    PrintItems(data);
}

DATA *copyItems(size_t num, DATA_TYPE type, ...)
{
    DATA *data = malloc(sizeof(*data));
        
    va_list ap;
    va_start(ap, type);
    
    if(data)
    {
        data -> size = 0;
        data -> data = malloc(datasize(data) * num);
        data -> dtype = type;
        if(data -> data)
        {
            for (size_t n = 0; n < num; n++, data -> size++)
            {
                switch(data -> dtype)
                {
                    case INT:
                        ((int *)data -> data)[n] = va_arg(ap, int);
                        break;
                    case CHARPTR:
                        ((char **)data -> data)[n] = strdup(va_arg(ap, char *));
                        break;
                    default:
                        break;
                }
            }
        }
        else
        { /* error handler */}
    }
    va_end(ap);
    return data;
}

void PrintItems(const DATA *data)
{
    if(data && data -> size && data -> data)
    {
        for(size_t i = 0; i < data -> size; i++)
        {
            switch(data -> dtype)
            {
                case INT:
                    printf("[%zu] = %d\n", i, ((int *)data -> data)[i]);
                    break;
                case CHARPTR:
                    printf("[%zu] = %s\n", i, ((char **)data -> data)[i]);
                    break;
                default:
                    break;
            }
        }
    }
}

https://godbolt.org/z/PjWhrGvvP

0___________
  • 60,014
  • 4
  • 34
  • 74
  • thank you, I actually already have a struct where the void pointer is stored, and I very likely would have saved the necessary infos like you did, alongside in the struct. in any case this gives me approval that this method is ok and allowed/working. however I'm curious what you are thinking about the Union approach of @arfneto. which approach is more betterer or more common/best practice? thanks! – azzurro Dec 17 '21 at 09:14
0

This is a sort of variant record, present in C and many languages like Pascal, and maybe you could stick to the usual.

In C we can use anonymous unions to this effect.

I will show you a short example.

Note that I will not implement here the logic to copy the strings to a new vector, and it is important for safety. Please comment if you think you need an example.

The data structure example

typedef struct
{
    unsigned char id;  // 1,2,3
    union
    {
        unsigned char the_char;
        unsigned char size;  // up to 255 strings
    };
    union
    {
        int          the_int;
        const char** argv;
    };

} Data;

The reason for the 1st union is described below, but it also helps to keep the size of Data even and it is useful sometimes, for alignment purposes.

The 2nd union is optional and exists for the non-char cases, so the minimum size of Data is 2 bytes.

Here we use id as 1 for char, 2 for int and 3 for a vector of strings.

  • When holding a single char it follows the id so we use just 2 bytes

  • When holding an int it follows these 2 bytes

  • When using strings the code has 2 options:

    • for up to 255 strings the count is put at the size component, giving an alternate meaning for the 2nd byte of Data.
    • When size is 0 the vector is assumed to be null-terminated and this is useful when there can be a huge number of strings that would overflow size

output


char: '?'

int: Value is 42

array of strings: [4 in total]

        #1      an
        #2      array
        #3      of
        #4      strings


array of strings [null-terminated]:

        #1      an
        #2      array
        #3      of
        #4      strings
        #5      ends here

5 strings were found...

the code

#include <iso646.h>
#include <stdio.h>

typedef struct
{
    unsigned char id;  // 1,2,3
    union
    {
        unsigned char the_char;
        unsigned char size;  // up to 255 strings
    };
    union
    {
        int          the_int;
        const char** argv;
    };

} Data;

int check_one(Data*);
int valid_id(Data*);

int main(void)
{
    const char* words[] = {"an",      "array",     "of",
                           "strings", "ends here", NULL};

    Data data[4];

    data[0].id       = 1;
    data[0].the_char = '?';

    data[1].id      = 2;
    data[1].the_int = 42;

    data[2].id   = 3;
    data[2].size = 4;
    data[2].argv = words;

    data[3].id   = 3;
    data[3].size = 0;  // signals the array as null-terminated
    data[3].argv = words;

    for (int i = 0; i < 4; i += 1) check_one(&data[i]);

    return 0;
}

int check_one(Data* d)
{
    const char* what[] = {NULL, "char", "int", "array of strings"};
    if (d == NULL) return -1;
    if (not valid_id(d)) return -2;
    switch (d->id)
    {
        case 1:
            printf("\n%s: '%c'\n", what[d->id], d->the_char);
            break;
        case 2:
            printf("\n%s: Value is %d\n", what[d->id], d->the_int);
            break;
        case 3:
            if (d->size != 0)
            {  // known size
                printf("\n%s: [%d in total]\n\n", what[d->id], d->size);
                for (int i = 0; i < d->size; i += 1)
                    printf("\t#%d\t%s\n", 1 + i, d->argv[i]);
                printf("\n");
                return 0;
            };
            // ok: the array is NULL-terminated
            printf("\n%s [null-terminated]:\n\n", what[d->id]);
            int sz = 0;
            for (; d->argv[sz] != NULL; sz += 1)
                printf("\t#%d\t%s\n", 1 + sz, d->argv[sz]);
            printf("\n%d strings were found...\n\n", sz);
            break;
        default:
            break;
    }
    return 0;
}
int valid_id(Data* d) { return ((d->id > 0) && (d->id < 4)); };
arfneto
  • 1,227
  • 1
  • 6
  • 13
  • interesting approach, thanks for explaining! (why) do you think, this is a better approach than the one from @0___________ ? – azzurro Dec 17 '21 at 09:11
  • I did not said it is better or worse. I did not even had read the other until now. What I can say is that what I wrote is flexible: (a) it is easy to can change the implementation and add new message types. (b) It is easy to serialize, in order to transmit as a buffer or write to a file. (c) The 2 functions makes changing easier (d) in a real implementation you will need to prepare and copy the data (e) the data includes format so you do need to "make sure" anything – arfneto Dec 17 '21 at 12:48
  • and is the common way to write this, like in Pascal _variant records_ – arfneto Dec 17 '21 at 12:49