7

Is there any way to do this with strtok function? or any suggestions ?

Example :

Insert "hello world" to dbms

Result:

Insert
"hello world"
to
dbms
pts
  • 80,836
  • 20
  • 110
  • 183
Melih Altıntaş
  • 2,495
  • 1
  • 22
  • 35

6 Answers6

6

strtok or any other function in the standard C library can't do this for you. To get it, you have to write code for it yourself, or you have to find some existing code in some external library.

pts
  • 80,836
  • 20
  • 110
  • 183
5

This function takes delimiting, openblock and closeblock characters. Delimiting characters are ignored within the block and closing block characters must match the opening block characters. The example splits on space and blocks are defined by quote and brackets, braces and <>. Thanks to Jongware for comments!

#include<stdlib.h>
#include<stdio.h>
#include<string.h>

char *strmbtok ( char *input, char *delimit, char *openblock, char *closeblock) {
    static char *token = NULL;
    char *lead = NULL;
    char *block = NULL;
    int iBlock = 0;
    int iBlockIndex = 0;

    if ( input != NULL) {
        token = input;
        lead = input;
    }
    else {
        lead = token;
        if ( *token == '\0') {
            lead = NULL;
        }
    }

    while ( *token != '\0') {
        if ( iBlock) {
            if ( closeblock[iBlockIndex] == *token) {
                iBlock = 0;
            }
            token++;
            continue;
        }
        if ( ( block = strchr ( openblock, *token)) != NULL) {
            iBlock = 1;
            iBlockIndex = block - openblock;
            token++;
            continue;
        }
        if ( strchr ( delimit, *token) != NULL) {
            *token = '\0';
            token++;
            break;
        }
        token++;
    }
    return lead;
}

int main (int argc , char *argv[]) {
    char *tok;
    char acOpen[]  = {"\"[<{"};
    char acClose[] = {"\"]>}"};
    char acStr[] = {"this contains blocks \"a [quoted block\" and a [bracketed \"block] and <other ]\" blocks>"};

    tok = strmbtok ( acStr, " ", acOpen, acClose);
    printf ( "%s\n", tok);
    while ( ( tok = strmbtok ( NULL, " ", acOpen, acClose)) != NULL) {
        printf ( "%s\n", tok);
    }

    return 0;
}

output
this
contains
blocks
"a [quoted block"
and
a
[bracketed "block]
and

user3121023
  • 8,181
  • 5
  • 18
  • 16
  • Very interesting approach, but it fails when quotes appear inside a block. Perhaps you could define pairs of "open, close" characters instead? – Jongware Oct 05 '14 at 11:34
3

No luck using strtok().

Fun opportunity to employ a state machine.

#include <stdio.h>

void printstring(const char *frm, const char *to) {
  fputc('<', stdout);  // <...>\n Added for output clarity
  while (frm < to) {
    fputc(*frm++, stdout);
  }
  fputc('>', stdout);
  fputc('\n', stdout);
}

void split_space_not_quote(const char *s) {
  const char *start;
  int state = ' ';
  while (*s) {
    switch (state) {
      case '\n': // Could add various white-space here like \f \t \r \v
      case ' ': // Consuming spaces
        if (*s == '\"') {
          start = s;
          state = '\"';  // begin quote
        } else if (*s != ' ') {
          start = s;
          state = 'T';
        }
        break;
      case 'T': // non-quoted text
        if (*s == ' ') {
          printstring(start, s);
          state = ' ';
        } else if (*s == '\"') {
          state = '\"'; // begin quote
        }
        break;
      case '\"': // Inside a quote
        if (*s == '\"') {
          state = 'T'; // end quote
        }
        break;
    }
    s++;
  } // end while
  if (state != ' ') {
    printstring(start, s);
  }
}

int main(void) {
  split_space_not_quote("Insert \"hello world\" to dbms");
  return 0;
}

<Insert>
<"hello world">
<to>
<dbms>
chux - Reinstate Monica
  • 143,097
  • 13
  • 135
  • 256
  • +1; For added fun, you can easily add the Javascript-like "whatever comes first" rule for `'` as well. – Jongware Oct 03 '14 at 22:34
  • I need something like this except it should split comma when it is not quotes. –  Sep 19 '16 at 11:29
1

Maybe you can use a regexp (ie Regular expressions in C: examples?)

Here an example of regex you can use: /([\w]+)|(\"[\w\ ]+\")/gi

To train yourself with regex you should also use: http://regex101.com/

Community
  • 1
  • 1
Xavier S.
  • 1,147
  • 13
  • 33
0

You could do a first pass where strtok splits the string using the quote character as your delimiter. Then do a second pass with the space character as the delimiter on the resulting strings that were non-quoted.

Edited to add working source code:

bool quotStr = (*stringToSplit == '\"');
char* currQuot = strtok(stringToSplit, "\"");
char* next = NULL;

while(currQuot)
{
    if(quotStr)
    {
        printf("\"%s\"\n", currQuot);
        quotStr = false;
    }
    else
    {
        // remember where the outer loop strtok left off
        next = strtok(next, "\0");

        // subdivide
        char* currWord = strtok(currQuot, " ");
        while(currWord)
        {
            printf("%s\n", currWord);
            currWord = strtok(NULL, " ");
        }
        quotStr = true;
    }

    currQuot = strtok(next, "\"");
    next = NULL;
}

I believe this will still fail in the case of empty quoted strings, though...

iwolf
  • 1,080
  • 1
  • 7
  • 10
  • `strtok` removes the split-on character, so you would loose the information of which strings to split on spaces. – Jongware Oct 03 '14 at 23:44
  • If the first character of the given string is a delimiter character, it doesn't get destroyed. You will split "every other" resulting string on spaces with the existence of a leading quote on your first substring as the indicator of whether you're further splitting the odd or even substrings – iwolf Oct 03 '14 at 23:51
0

My solution with strtok(). It only groups words that start with Space-Quotes and end with Quotes-Space

void split(char *argstring)
{
    int _argc = 0;
    char **_argv = malloc(sizeof(char*));
    char *token;
    int myFlag = 0;

    for(token = strtok(argstring, " "); token != NULL; token = strtok(NULL, " "))
    {
        if (1 == myFlag)
        {
            //One of the previous token started with double quotes
            if ('\"' == token[strlen(token)-1]) myFlag = 0; //This token ends with double quotes
            _argv[_argc-1] = realloc(_argv[_argc-1], strlen(_argv[_argc-1]) + strlen(token) + 2);   //Enlarge the previous token
            strcat(_argv[_argc-1], " ");
            strcat(_argv[_argc-1], token);
        }
        else
        {
            if ('\"' == token[0]) myFlag = 1;                       //This token starts with double quotes
            _argv = realloc(_argv, (_argc + 1) * sizeof(char*));    //Add one element to the array of strings
            _argv[_argc] = malloc(strlen(token) + 1);       //Allocate the memory for the Nth element
            strcpy(_argv[_argc], token);                            //Copy the token in the array
            _argc++;
        }
    }

    do
    {
        free(_argv[_argc--]);
    } while (_argc >= 0);
}
mfloris
  • 361
  • 4
  • 16