I have written this C code to take a String from stdin, tokenize it, and then print each token:
#include <stdio.h>
#include <stdlib.h>
void tokenize(char *buffer, char** tokens, int n) {
int tokens_index = 0;
int curr_index = 0;
for(int i = 0; i < n; i++) {
if(buffer[i] != ' ' && buffer[i] != '\0') {
tokens[tokens_index][curr_index] = buffer[i];
curr_index++;
}
else if(i != 0 && buffer[i] == ' ' && buffer[i-1] == ' ') {
continue;
}
else if(buffer[i] == ' ') {
curr_index = 0;
tokens_index++;
}
else if(buffer[i] == '\0') {
tokens_index++;
tokens[tokens_index][0] == '\0';
break;
}
}
}
int main(int argc, char **argv) {
char buffer[50];
while(fgets(buffer, 50, stdin)) {
char **tokens = (char **) malloc(sizeof(char *) * 20);
for(int i = 0; i < 20; i++) {
tokens[i] = (char *) malloc(sizeof(char) * 50);
}
tokenize(buffer, tokens, 50);
for(int i = 0; i < 20; i++) {
if(*tokens[i] == '\0') {
break;
}
printf("%s\n", tokens[i]);
}
for(int i = 0; i < 20; i++) {
free(tokens[i]);
}
free(tokens);
}
return 0;
}
Input:
ab cd ef
gh ij kl
The first time the loop runs, it is able to successfully tokenize the user's input and print it to stdout. However, if I run the loop additional times the tokens are outputed, but garbage is outputted as well.
Output:
ab
cd
ef
gh
ij
kl
��
��
`�
If I remove the free from inside the nested for loop however, it ends up working perfectly.
I don't understand why it is doing this. Shouldn't my program be able to just allocate new memory for each token after I free the char * in the previous iteration? What am I missing?