1

I basically have a sentence in a string and want to break it down word per word. Every word should go into an array of strings. I am not allowed to use strtok. I have this code but it doesn't work. Can someone help?

There is for sure something similar in the internet but I couldn't find anything...

int main(){

    char s[10000];                        // sentence
    char array[100][100];                 // array where I put every word

    printf("Insert sentence: ");          // receive the sentence
    gets(s);

    int i = 0;
    int j = 0;

    for(j = 0; s[j] != '\0'; j++){        // loop until I reach the end
        for(i = 0; s[i] != ' '; i++){     // loop until the word is over
            array[j][i] = s[i];           // put every char in the array
        }
    }

    return 0;
}
sg7
  • 6,108
  • 2
  • 32
  • 40

3 Answers3

2

Every word should go into an array of strings. I am not allowed to use strtok.

Interesting problem which could be resolved in a compact algorithm. It handles multiple spaces and punctuation marks specified in check(char c).

The most difficult part of the problem is to properly handle corner cases. We may have situation when words are longer more than WORD_LEN length or the number of words exceeds the capacity of the array.

Both cases are properly handled. The algorithm truncates the excessive words and parses only to the capacity of the array.

(BTW. Do not use gets: Why is the gets function so dangerous that it should not be used?)

Edit: The fully tested find_tokens function has been presented.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define WORD_LEN            3 // 100 // MAX WORD LEN
#define NR_OF_WORDS         3 // 100 // MAX NUMBER OF WORDS
#define INPUT_SIZE 10000

int is_delimiter(const char * delimiters, char c) // check for a delimiter
{
    char *p = strchr (delimiters, c);    // if not NULL c is separator

    if (p) return 1;                     // delimeter
    else return 0;                       // not a delimeter
}    

int skip(int *i, char *str, int skip_delimiters, const char *delimiters)
{
    while(1){
        if(skip_delimiters) {
            if(  (str[(*i)+1] =='\0') || (!is_delimiter(delimiters, str[(*i)+1])) )  
                break;        // break on nondelimeter or '\0' 
            else (*i)++;      // advance to next character
        } 
        else{                 // skip excess characters in the token
            if( is_delimiter(delimiters, str[(*i)]) ) 
            {
                if(  (str[(*i)+1] =='\0') || !is_delimiter(delimiters, str[(*i)+1]) )
                    break;    // break on non delimiter or '\0'
                else (*i)++;  // skip delimiters
            }
            else (*i)++;      // skip non delimiters               
        }
    }        

    if ( str[(*i)+1] =='\0') return 0;
    else return 1;
}                

int find_tokens(int max_tokens, int token_len, char *str, char array[][token_len+1], const char *delimiters, int *nr_of_tokens)
{
    int i =  0;
    int j =  0;
    int l =  0;
    *nr_of_tokens = 0;
    int status = 0;                           // all OK!
    int skip_leading_delimiters = 1;
    int token = 0;
    int more;

    for(i = 0; str[i] != '\0'; i++){          // loop until I reach the end

        // skip leading delimiters
        if( skip_leading_delimiters )
        {
           if( is_delimiter( delimiters, str[i]) ) continue;
           skip_leading_delimiters = 0;
        }

        if( !is_delimiter(delimiters,str[i]) && (j < token_len) )          
        {
            array[l][j] = str[i];             // put  char in the array
            //printf("%c!\n", array[l][j] );
            j++;
            array[l][j] = 0;
            token = 1;
        }
        else
        {   
            //printf("%c?\n", str[i] );
            array[l][j] = '\0';                        // token terminations

            if (j < token_len) {
               more = skip(&i, str, 1, delimiters);    // skip delimiters
            }
            else{
                more = skip(&i, str, 0, delimiters);  // skip excess of the characters in token
                status = status | 0x01;               // token has been truncated
            }

            j = 0;
            //printf("more %d\n",more);
            if(token){
                if (more) l++;
            }

            if(l >= max_tokens){
                status = status | 0x02;              // more tokens than expected
                break;
            }
        }
    }

    if(l>=max_tokens)
        *nr_of_tokens = max_tokens;
    else{
        if(l<=0 && token)
          *nr_of_tokens = 1;
        else
        {
            if(token)
                *nr_of_tokens = l+1;
            else   
                *nr_of_tokens = l;
        }
    }
    return status;
}    

int main(void){
    char input[INPUT_SIZE+1];                // sentence
    char array[NR_OF_WORDS][WORD_LEN+1];     // array where I put every word, remeber to include null terminator!!!

    int number_of_words;
    const char * delimiters =  " .,;:\t";    // word delimiters 
    char *p;

    printf("Insert sentence: ");             // receive the sentence
    fgets(input, INPUT_SIZE, stdin);
    if ( (p = strchr(input, '\n')) != NULL) *p = '\0'; // remove '\n'

    int ret = find_tokens(NR_OF_WORDS, WORD_LEN, input, array, delimiters, &number_of_words);

    printf("tokens= %d ret= %d\n", number_of_words, ret);

    for (int i=0; i < number_of_words; i++)
        printf("%d: %s\n", i, array[i]);

    printf("End\n");    
    return 0;
}

Test:

Insert sentence: ..........1234567,,,,,,abcdefgh....123::::::::::::                                                                          
tokens= 3 ret= 1                                                                                                                             
0: 123                                                                                                                                       
1: abc                                                                                                                                       
2: 123                                                                                                                                       
End
sg7
  • 6,108
  • 2
  • 32
  • 40
1

You are not '\0'-terminating the strings and you are scanning the source from the beginning every time you've found a empty character.

You only need one loop and, the inner loop and the condition must be s[i] != 0:

int j = 0; // index for array
int k = 0; // index for array[j]
for(i = 0; s[i] != '\0'; ++i)
{
    if(k == 99)
    {
        // word longer than array[j] can hold, aborting
        array[j][99] = 0; // 0-terminating string
        break;
    }

    if(j == 99)
    {
        // more words than array can hold, aborting
        break;
    }

    if(s[i] == ' ')
    {
        array[j][k] = 0; // 0-terminating string
        j++; // for the next entry in array
        k = 0;
    } else
        array[j][k++] = s[i]; 
}

Note that this algorithm doesn't handle multiple spaces and punctuation marks. This can be solved by using a variable that stores the last state.

int j = 0; // index for array
int k = 0; // index for array[j]
int sep_state = 0; // 0 normal mode, 1 separation mode
for(i = 0; s[i] != '\0'; ++i)
{
    if(k == 99)
    {
        // word longer than array[j] can hold, aborting
        array[j][99] = 0; // 0-terminating string
        break;
    }

    if(j == 99)
    {
        // more words than array can hold, aborting
        break;
    }

    // check for usual word separators
    if(s[i] == ' ' || s[i] == '.' || s[i] == ',' || s[i] == ';' || s[i] == ':')
    {
        if(sep_state == 1)
            continue; // skip multiple separators
        array[j][k] = 0; // 0-terminating string
        j++; // for the next entry in array
        k = 0;
        sep_state = 1; // enter separation mode
    } else {
        array[j][k++] = s[i];
        sep_state = 0; // leave separation mode
    }
}

As you can see, using the sep_state variable I'm able to check if multiple separators come one after the other and skips subsequent separators. I also check for common punctuation marks.

sg7
  • 6,108
  • 2
  • 32
  • 40
Pablo
  • 13,271
  • 4
  • 39
  • 59
0
#include <stdio.h>

int main()
{

char s[10000];                        // sentence
char array[100][100];                 // array where i put every word

printf("Insert sentence: ");          // receive the sentece
gets(s);
printf("%s",s);

int i = 0;
int j = 0;
int k = 0;

for(j = 0; s[j] != '\0'; j++){        // loop until i reach the end

  if ( s[j] != ' ' || s[j] == '\0' )
  {
    array[i][k] = s[j];
    k++;
  }
  else {
    i++;
    k = 0;
  }

}

return 0;
}

please note that the gets function is very unsafe and shouldn't in any case be used, use scanf or fgets instead

Alessandro Messori
  • 1,035
  • 1
  • 14
  • 23
  • The check `s[j] == '\0'` is unnecessary, the break condition of the loop is `s[j] != '\0'`, so `s[j]`will never be `'\0'` in the loop – Pablo Mar 23 '18 at 22:52
  • Unfortunately, the algorithm does not work for reason stated by @Pablo. Also 1) it allows for buffer overflows: length of the words are not checked 2) number of words can exceed the array capacity 3) It does not handle multiple separators. 4) It cannot handle multiple spaces or punctuation marks. – sg7 Mar 24 '18 at 02:05