1

I am trying to write a program that will dynamically allocate enough space to store all the words in a 1D char array separated by a space. ex:

char *literal = "The quick brown fox";
char **words = { "The", "quick", "brown", "fox" };

The program I wrote keeps segfaulting when trying to strncpy(str[buff_ptr],tok,strlen(tok));

I will post my code bellow:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char *mutableString(char *lit) {
    int size = strlen(lit);
    char *str = (char *)malloc(sizeof(char) * size);
    strncpy(str, lit, size + 1);
    return str;
}

int numTokens(char *str, const char *DELIM) {
    char* clone = (char*)malloc(sizeof(char*));
    strncpy(clone, str, strlen(str) + 1);
    int count = 0;
    for (char *tok = strtok(clone, " "); tok != NULL; tok = strtok(NULL, " "))
        count++;
    free(clone);
    return count;
}

char **tokenize(char *str, const char *DELIM) {
    printf("tokenize-------------------------\n");
    int size = numTokens(str, DELIM);
    //allocate space on heap for buffer
    char **buff = (char **)malloc(size * sizeof(char *));
    //get first word
    char *tok = strtok(str, DELIM);
    int buff_ptr = 0;
    while (tok != NULL) {
        strncpy(buff[buff_ptr], tok, strlen(tok) + 1);
        printf("buff[%d]%s\n", buff_ptr, buff[buff_ptr]);
        //increment to next word for storage
        buff_ptr++;
        //find next word in string
        tok = strtok(NULL, DELIM);
    }
    for (int i = 0; i < size; i++) {
        printf("%s\n", buff[i]);
    }
    //return 2D pointer
    return buff;
}

int main() {
    char *literal = "some literal string.";
    //convert string to mutable string for strtok
    char *str = mutableString(literal);
    //set 2D pointer equal to the pointer address returned
    char **no_spaces_str = tokenize(str, " ");
    printf("%s\n", str);
    for (int i = 0; i < numTokens(str, " "); i++) {
        printf("%s\n", no_spaces_str[i]);
    }
    //free heap allocated memory
    free(str);
    free(no_spaces_str);
    return 0;
}

Please see attachment of lldb stack variables:

lldb output

chqrlie
  • 131,814
  • 10
  • 121
  • 189
Kyle C
  • 43
  • 4
  • 1
    `strlen` gives number of `char`s, you need to allocate `+1` size to cater null termination `\0`. – TruthSeeker Dec 27 '21 at 18:42
  • That makes sense @TruthSeeker, but why would it segfault, wouldn't it read garbage value up to the nullterminator? – Kyle C Dec 27 '21 at 18:44
  • @KyleC. When you write a NUL terminator into memory you don't own, you can segfault. When you read memory that's not NUL terminated, you may hit memory you aren't allowed to read before you get NUL – Mad Physicist Dec 27 '21 at 18:50
  • `strncpy(buff[buff_ptr],tok,strlen(tok));` line is causing the `seg-fault` as you have not allocated any memory before copying the word to it – TruthSeeker Dec 27 '21 at 18:50
  • If you can afford it, just add NULs to your existing string and make a buffer of pointers to the segments. Make sure you only free the first one though. – Mad Physicist Dec 27 '21 at 18:51
  • @TruthSeeker I allocated memory to buffer here ```c //allocate space on heap for buffer char **buff = (char**)malloc(size*sizeof(char*)); ``` – Kyle C Dec 27 '21 at 19:02

4 Answers4

0

Within the function mutableString there is dynamically allocated the character array str that does not contain a string

char* mutableString(char* lit){
  int size = strlen(lit);
  char* str = (char*)malloc(sizeof(char)*size);
  strncpy(str,lit,size);
  return str;
}

So other functions invoke undefined behavior as for example in this for loop

int numTokens(char* str, const char* DELIM){
  int count = 0;
  for(; *str != '\0'; str++)
  //...

Moreover if the array contained a string nevertheless the function numTokens is incorrect because for example it returns 0 when a passed string contains only one word.

Also in the function tokenize

strncpy(buff[buff_ptr],tok,strlen(tok));

there are used uninitialized pointers buff[buff_ptr] allocated like.

char **buff = (char**)malloc(size*sizeof(char*));

And again you are trying to copy strings without including the terminating zero character '\0; using eth functions strncpy.

So this call in main

printf("%s\n",no_spaces_str[i]);

also will invoke undefined behavior.

Vlad from Moscow
  • 301,070
  • 26
  • 186
  • 335
  • how does mutableString not contain a string? I am copying a literal string into allocated memory to make it mutable. Yes I understand numTokens is not a sufficent way to find words but in this case I am forcing it to be correct by adding a space to the end of my literal string. My main question is how to copy tokens from a 1D char array to a 2D char array using dynamically allocated memory? Can you explain what you mean by there are "uninitialized pointers buff[buff_ptr] allocated like"? Thank you – Kyle C Dec 27 '21 at 18:58
  • @KyleC A string is a sequence of characters including the terminating zero character '\0'. And you are not including it when are making a copy. So this condition in the for loop *str != '\0' invokes undefined behavior. – Vlad from Moscow Dec 27 '21 at 19:01
  • Ahh, so even know the literal string already has a null terminator when you strncpy, strlen is not counting the nullterminator, so I need to explicitly +1 to my size? – Kyle C Dec 27 '21 at 19:04
  • @KyleC Yes you need to add to the length one more byte and use either memcpy or strcpy. – Vlad from Moscow Dec 27 '21 at 19:05
  • Thank you for your advise I made the necessary changes to add null terminators to my char arrays, I updated my numTokens to count all tokens, I am still seg faulting though. I made an update to show my edits. Please let me know if you have any other advise, thanks. – Kyle C Dec 27 '21 at 19:16
  • @KyleC Reread mu answer one more. – Vlad from Moscow Dec 27 '21 at 19:18
  • Hmmm im still stuck on what you mean by "used unitialized pointers buff[buff_ptr] allocated like". My understanding is I am creating a bufferspace of size * sizeof(char*) when I malloc to buffer. So since size = 3, I will have a buffer array of 3 char* pointers. Then when strncpy(buff[buff_ptr],tok,strlen(tok)+1) I will copy the value at tok, up to the strlen + 1 including the null terminator, into buff[0] - buff[2]. Im not sure why its segfaulting here. – Kyle C Dec 27 '21 at 19:27
  • @KyleC Where do the pointers point? – Vlad from Moscow Dec 27 '21 at 19:28
  • To a memory space in the Heap? – Kyle C Dec 27 '21 at 19:29
  • @KyleC They have indeterminate values after calling char **buff = (char**)malloc(size*sizeof(char*)); The only initialized pointer is buff that points to the allocated memory of pointers. – Vlad from Moscow Dec 27 '21 at 19:32
  • If I add the line ```buff[buff_ptr] = (char*)malloc(sizeof(char)*10)``` in while loop of tokenize, I will be giving each ptr space in memory, This is confusing to me because this will cause fragmentation in the heap, is there not a better way of allocated memory for an array of strings? (array of char arrays) – Kyle C Dec 27 '21 at 19:36
  • Also I tried this line and got a segfault still. – Kyle C Dec 27 '21 at 19:37
  • @KyleC I am sorry but I described all problems with your code. If you have some more problems then ask a new question. – Vlad from Moscow Dec 27 '21 at 19:41
  • Ok thank you for you expertise, that helped alot. – Kyle C Dec 27 '21 at 20:07
0

Apart from the @Vlad from Moscow mentioned points,

malloc return values must not be type-casted Do I cast the result of malloc?

I tried to clean up the code find the snippet below, DEMO

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct{
    char** buff;
    int    size;
}Array_2d;

char* mutableString(const char* lit){
  int size = strlen(lit);
  char* str = malloc(size);
  strncpy(str,lit,size+1);
  return str;
}

int getNextWordLength(const char* str){
  int index = 0;
  while(*str && (*str != ' ')){
      //printf("%c",*str);
      ++index;
      ++str;
  }
  return index;
}

int numTokens(const char* str){
  int count = 0;
  for(; *str != '\0'; str++)
  {
    if(*str == ' ')
      count++;
  }
  return count;
}

void tokenize(const char* str, const char *DELIM, Array_2d *array){

  int len = strlen(str)+1;

    if(!str && !len){
        array->buff = 0;
        array->size = 0;
    }

    int number_of_words = numTokens(str)+1;
    //allocate space on heap for buffer
    char **buff = (char**)malloc(number_of_words*sizeof(char*));

    int index = 0; 

    do{
        //get first word
        
        int word_length = getNextWordLength(str);
        
        //To compensate null terminal
        buff[index] = malloc(word_length+1);

        strncpy(buff[index], str,word_length);

        buff[index][word_length+1] = '\0';

        str += word_length+1;
        ++index;

    }while(index < number_of_words);

    //update return value
    array->buff = buff;
    array->size = number_of_words;

}

int main(){
    char* literal = "hello world this is test";
    //convert string to mutatable string for strtok
    char* str = mutableString(literal);
    printf("Complete String is : %s\n",str);
    

    Array_2d array;
    // set 2D pointer equal to the pointer addres returned
     tokenize(str, " ",&array);

    printf("Tokenized String\n");
    for(int i=0;i<array.size;i++){
        printf("%s\n",array.buff[i]);
    }

    free(str);

    for(int i =0;i< array.size; ++i)
    free(array.buff[i]);

    free(array.buff);

  return 0;
}
TruthSeeker
  • 1,539
  • 11
  • 24
  • Thank @TruthSeeker looks like you went out of your way to almost completely rewrite my code. My main issue was understanding the dynamic memory allocation, I think I got it now, but nice struct solution I didnt even consider this – Kyle C Dec 27 '21 at 20:32
  • Also its interesting you mentioned to not cast. My understanding of malloc returns a void* pointer so specifying what type of ptr you are returning tells the compiler what value type will be dereferenced at that address. I know the newer version of C compiler does this for you but just like return 0 is un-necessary its still good practice to include for clarity I thought. – Kyle C Dec 27 '21 at 20:37
0

This is the corrected version of the code above

  1. When you copying string you should add 1 char for '\0'

    int size = strlen(lit)+1;

  2. Tokens buffer size should be size+1

    int size = numTokens(str, DELIM)+1;

  3. Strncpy is not required strncpy(buff[buff_ptr], tok, strlen(tok) + 1); you already copied string char* str = mutableString(literal); just point to n-th buffer every next token buff[buff_ptr]=tok;

  4. for (int i = 0; i<numTokens(str, " "); i++){
        printf("%s\n", no_spaces_str[i]);
    }
    

    This code wouldn't work correctly. strtok manipulates the string you pass in and returns a pointer to it, so no memory is allocated. so all spaces will be replaced by '\0'

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#pragma warning(push)
#pragma warning(disable : 4996)

char* mutableString(char* lit){
    int size = strlen(lit)+1;
    char* str = (char*)malloc(sizeof(char)*size);
    strncpy(str, lit, size);
    return str;
}
int numTokens(char* str, const char* DELIM){
    int count = 0;
    for (; *str != '\0'; str++)
    {
        if (*str == ' ')
            count++;
    }
    return count;
}
char** tokenize(char* str, const char* DELIM){
    printf("tokenize-------------------------\n");
    int size = numTokens(str, DELIM)+1;
    //allocate space on heap for buffer
    char **buff = (char**)malloc((size)*sizeof(char*));
    //get first word
    char* tok = strtok(str, DELIM);
    int buff_ptr = 0;
    while (tok != NULL){

        buff[buff_ptr]=tok;
        printf("buff[%d]%s\n", buff_ptr, buff[buff_ptr]);
        //increment to next word for storage
        buff_ptr++;
        //find next word in string
        tok = strtok(NULL, DELIM);
    }

    for (int i = 0; i<size; i++){
        printf("%s\n", buff[i]);
    }
    //return 2D pointer
    return buff;
}
int main(){
    char* literal = "some literal string.";
    //convert string to mutatable string for strtok
    char* str = mutableString(literal);
    //set 2D pointer equal to the pointer addres returned
    char** no_spaces_str = tokenize(str, " ");
    printf("%s\n", str);

    for (int i = 0; i<numTokens(str, " "); i++){
        printf("%s\n", no_spaces_str[i]);
    }
    //free heap allocated memory
    free(str);

    free(no_spaces_str);
    return 0;
}

results

tokenize-------------------------
buff[0]some
buff[1]literal
buff[2]string.
some
literal
string.
some
arutar
  • 1,015
  • 3
  • 9
0
char* mutableString(char* lit){
  int size = strlen(lit)+1;
  char* str = (char*)malloc(sizeof(char)*size);
  strncpy(str,lit,size);
  return str;
}
int numTokens(char* str, const char* DELIM){
  int size = strlen(str)+1;
  char* clone = (char*)malloc(sizeof(char)*size);
  strncpy(clone,str,size);
  int count = 0;
  for(char* tok = strtok(clone," "); tok != NULL; tok=strtok(NULL, " "))
      count++;
  free(clone);
  return count;
}
char** tokenize(char* str, const char* DELIM){
  int size = strlen(str)+1;
  char* clone = (char*)malloc(sizeof(char)*size);
  strncpy(clone,str,size);
  // printf("tokenize-------------------------\n");
  int size = numTokens(str, DELIM);
  //allocate space on heap for buffer
  char **buff = (char**)calloc(size,sizeof(char*));
  //get first word
  char* tok = strtok(clone,DELIM);
  int buff_ptr = 0;
  while(tok != NULL){
    // printf("token%d:%s\n",buff_ptr,tok);
    buff[buff_ptr] = (char*)malloc(sizeof(char)*strlen(tok)+1);
    strncpy(buff[buff_ptr],tok,strlen(tok)+1);
    //increment to next word for storage
    buff_ptr++;
    //find next word in string
    tok = strtok(NULL, DELIM);
  }
  //return 2D pointer
  free(clone);
  return buff;
}
int main(){
  char* literal = "some literal string.";
  //convert string to mutatable string for strtok
  char* str = mutableString(literal);
  //set 2D pointer equal to the pointer addres returned
  char** no_spaces_str = tokenize(str, " ");
  int num_words = numTokens(str," ");
  char* oneD = (char*)calloc(strlen(str)+1,sizeof(char));
  for(int i = 0;i<num_words;i++){
    strncat(oneD,no_spaces_str[i],strlen(no_spaces_str[i])+1);
    printf("%s\n",oneD);
  }
  
  //free heap allocated memory
  free(str);
  free(no_spaces_str);
  free(oneD);
  return 0;
}

Is solution to my problem. Thanks to all those who commented and helped me understand dynamic memory better.

Kyle C
  • 43
  • 4
  • Your answer could be improved with additional supporting information. Please [edit] to add further details, such as citations or documentation, so that others can confirm that your answer is correct. You can find more information on how to write good answers [in the help center](/help/how-to-answer). – Community Dec 27 '21 at 22:31
  • your program will crash again – arutar Dec 27 '21 at 22:33
  • You solution has problem with writing to unallocated memory. `char* str = (char*)malloc(sizeof(char)*size);` here allocated memory is of size 1 less than copied `strncpy(str,lit,size+1)` – TruthSeeker Dec 28 '21 at 06:41
  • Thank you for pointing that out. – Kyle C Dec 28 '21 at 16:17