I am trying to split a char32_t
string into tokens separated by a delimiter. I am not using any strtok or other std library function because, it is gurrented that input string and the delimiter will be mulltibyte unicode string.
Here is the function I have written:
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <uchar.h>
#include <wchar.h>
char32_t **sp(char32_t *str, char32_t delim, int *len) {
*len = 1;
char32_t *s = str;
while (*s != U'\0') {
if (*s == delim) {
(*len)++;
}
s++;
}
char32_t **tokens = (char32_t **)malloc((*len) * sizeof(char32_t *));
if (tokens == NULL) {
exit(111);
}
char32_t * p = str;
int i = 0;
while (*p != U'\0') {
int tok_len = 0;
while (p[tok_len] != U'\0' && p[tok_len] != delim) {
tok_len++;
}
tokens[i] = (char32_t *)malloc(sizeof(char32_t) * (tok_len + 1));
if (tokens[i] == NULL) {
exit(112);
}
memcpy(tokens[i], p, tok_len * sizeof(char32_t));
tokens[i][tok_len] = U'\0';
p += tok_len + 1;
i++;
}
return tokens;
}
And here is the driver code
int main() {
char32_t *str = U"Hello,World,mango,hey,";
char32_t delim = U',';
int len = 0;
char32_t ** tokens = sp(str, delim, &len);
wprintf(L"len -> %d\n", len);
for (int i = 0; i < len; i++) {
if (tokens[i]) {
wprintf(L"[%d] %ls\n" , i , tokens[i]);
}
free(tokens[i]);
}
free(tokens);
}
Here is the output:
len -> 5
[0] Hello
[1] World
[2] mango
[3] hey
[4] (null)
But when I check the program with valgrind it show multiple memory errors
valgrind -s --leak-check=full --track-origins=yes ./x3
==7703== Memcheck, a memory error detector
==7703== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.
==7703== Using Valgrind-3.20.0 and LibVEX; rerun with -h for copyright info
==7703== Command: ./x3
==7703==
tok -> 5
tok -> 5
tok -> 5
tok -> 3
len -> 5
[0] Hello
[1] World
[2] mango
[3] hey
==7703== Conditional jump or move depends on uninitialised value(s)
==7703== at 0x48FDAF8: __wprintf_buffer (vfprintf-process-arg.c:396)
==7703== by 0x48FF421: __vfwprintf_internal (vfprintf-internal.c:1459)
==7703== by 0x490CFAE: wprintf (wprintf.c:32)
==7703== by 0x1093C9: main (main.c:51)
==7703== Uninitialised value was created by a heap allocation
==7703== at 0x4841888: malloc (vg_replace_malloc.c:393)
==7703== by 0x1091FC: sp (main.c:17)
==7703== by 0x109384: main (main.c:47)
==7703==
[4] (null)
==7703== Conditional jump or move depends on uninitialised value(s)
==7703== at 0x4844225: free (vg_replace_malloc.c:884)
==7703== by 0x1093DA: main (main.c:52)
==7703== Uninitialised value was created by a heap allocation
==7703== at 0x4841888: malloc (vg_replace_malloc.c:393)
==7703== by 0x1091FC: sp (main.c:17)
==7703== by 0x109384: main (main.c:47)
==7703==
==7703==
==7703== HEAP SUMMARY:
==7703== in use at exit: 0 bytes in 0 blocks
==7703== total heap usage: 7 allocs, 7 frees, 5,248 bytes allocated
==7703==
==7703== All heap blocks were freed -- no leaks are possible
==7703==
==7703== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
==7703==
==7703== 1 errors in context 1 of 2:
==7703== Conditional jump or move depends on uninitialised value(s)
==7703== at 0x4844225: free (vg_replace_malloc.c:884)
==7703== by 0x1093DA: main (main.c:52)
==7703== Uninitialised value was created by a heap allocation
==7703== at 0x4841888: malloc (vg_replace_malloc.c:393)
==7703== by 0x1091FC: sp (main.c:17)
==7703== by 0x109384: main (main.c:47)
==7703==
==7703==
==7703== 1 errors in context 2 of 2:
==7703== Conditional jump or move depends on uninitialised value(s)
==7703== at 0x48FDAF8: __wprintf_buffer (vfprintf-process-arg.c:396)
==7703== by 0x48FF421: __vfwprintf_internal (vfprintf-internal.c:1459)
==7703== by 0x490CFAE: wprintf (wprintf.c:32)
==7703== by 0x1093C9: main (main.c:51)
==7703== Uninitialised value was created by a heap allocation
==7703== at 0x4841888: malloc (vg_replace_malloc.c:393)
==7703== by 0x1091FC: sp (main.c:17)
==7703== by 0x109384: main (main.c:47)
==7703==
==7703== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
I am unable to figure out what is the problem. any help will be appreciated
I have also tried with unicode strings the same error also occurs.