-1

I'm writing a function to split a string into a pointer to pointer, If separator is space, I want to split only the words that are not inside quotes. e.g Hello world "not split" should return

Hello
world
"not split"

somehow the function split the words inside the quotes and doesn't split words outside the quotes.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

int is_quotes(char *s)
{
    int i;
    int count;

    i = 0;
    count = 0;
    while (s[i])
    {
        if (s[i] == '"')
            count++;
        i++;
    }
    if (count == 0)
        count = 1;
    return (count % 2);
}

int count_words(char *s, char sep)
{
    int check;
    int i;
    int count;

    check = 0;
    if (sep == ' ')
      check = 1;
    i = 0;
    count = 0;
    while (*s && *s == sep)
        ++s;
    if (*s)
        count = 1;
    while (s[i])
    {
        if (s[i] == sep)
        {
          if (!is_quotes(s + i) && check)
          {
            i += 2;
            while (s[i] != 34 && s[i])
                i++;
          }
          count++;
        }
        i++;
    }
    return (count);
}

char    *ft_strsub(char const *s, unsigned int start, size_t len)
{
    char    *sub;

    sub = malloc(len + 1);
    if (sub)
        memcpy(sub, s + start, len);
    return (sub);
}

char        **ft_strsplit(char const *s, char c)
{
    int     words;
    char    *start;
    char    **result;
    int     i;

    words = count_words((char *)s, c);
    if (!s || !c || words == 0)
        return (NULL);
    i = 0;
    result = (char **)malloc(sizeof(char *) * (words + 1));
    start = (char *)s;
    while (s[i])
    {
        if (s[i] == c)
        {
            if (is_quotes((char *)s + i) == 0 && c == ' ')
            {
                i += 2;
                while (s[i] != '"' && s[i])
                    i++;
                i -= 1;
            }
            if (start != (s + i))
                *(result++) = ft_strsub(start, 0, (s + i) - start);
            start = (char *)(s + i) + 1;
        }
        ++i;
    }
    if (start != (s + i))
        *(result++) = ft_strsub(start, 0, (s + i) - start);
    *result = NULL;
    return (result - words);
}

int main(int argc, char **argv)
{
    if (argc > 1)
    {
        char **s;
        s = ft_strsplit(argv[1], ' ');
        int i = 0;
        while (s[i])
            printf("%s\n", s[i++]);
    }
  return 0;
}

When I run this code with hello world "hello hello" I get the following

hello world
"hello
hello"
Junius L
  • 15,881
  • 6
  • 52
  • 96
  • @Olaf sorry I meant pointer to pointer `**`. – Junius L Dec 24 '16 at 19:50
  • Use a debugger to step through your code. – kaylum Dec 24 '16 at 20:12
  • The code has no `main` function. – Weather Vane Dec 24 '16 at 20:21
  • 1
    We shouldn't have to work out how `count_words()` works — you should show us the relevant code. It would be sensible to show the `main()` function too; it shouldn't be big and would make it into an MCVE ([MCVE]). In `ft_strsplit()` you have: `words = count_words((char *)s, c); if (!s || !c || words == 0) return (NULL);` — does `count_words()` take care of `s == 0` or `c == 0` cases? Head off the impossible as soon as possible. Consider adding an `assert(s != 0 && c != 0);` assertion before the runtime check. – Jonathan Leffler Dec 24 '16 at 20:24
  • Sorry I forgot to include `count_words()` and `main()`. – Junius L Dec 24 '16 at 20:36

2 Answers2

2

You need a state machine with two states, on quote and off quote. When you hit a quote, flip the state. When you hit a space, convert to a newline if off quote, not if on quote. (You will quickly want to make it more elaborate to allow string escapes etc, the state machine approach scales up to that).

Malcolm McLean
  • 6,258
  • 1
  • 17
  • 18
0

try this (fix and reduce)

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

typedef struct token {
    const char *top;
    const char *end;//point to next character
} Token;

Token getToken(const char **sp, char sep){
    const char *s = *sp;
    const char *top, *end;
    Token token = { NULL, NULL};

    while(*s && *s == sep)//skip top separators
        ++s;
    if(!*s){
        *sp = s;
        return token;//return null token
    }
    token.top = s;
    while(*s && *s != sep){
        if(*s == '"'){
            char *p = strchr(s + 1, '"');//search end '"'
            if(p)
                s = p;//skip to '"'
        }
        ++s;
    }
    token.end = s;
    *sp = s;

    return token;
}

int count_words(const char *s, char sep){
    int count = 0;
    Token token = getToken(&s, sep);

    while(token.top != NULL){
        ++count;
        token = getToken(&s, sep);
    }
    return count;
}

char *ft_strsub(Token token){
    size_t len = token.end - token.top;
    char *sub = malloc(len + 1);
    if (sub){
        memcpy(sub, token.top, len);
        sub[len] = 0;
    }
    return sub;
}

char **ft_strsplit(const char *s, char sep){
    int words;

    if (!s || !sep || !(words = count_words(s, sep)))
        return NULL;

    char **result = malloc(sizeof(char *) * (words + 1));
    if(!result){
        perror("malloc");
        return NULL;
    }

    int i = 0;
    Token token = getToken(&s, sep);

    while(token.top != NULL){
        result[i++] = ft_strsub(token);
        token = getToken(&s, sep);
    }
    result[i] = NULL;

    return result;
}

int main(int argc, char **argv){
    const char *text = "Hello world \"not split\"";
    char **s = ft_strsplit(text, ' ');
    int i = 0;
    while (s[i]){
        printf("%s\n", s[i]);
        free(s[i++]);
    }
    free(s);

    return 0;
}

Escape character processing version.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define ESCAPE '\\' //ESCAPE CHARACTER

typedef struct token {
    const char *top;
    const char *end;//point to next character
} Token;

Token getToken(const char **sp, char sep){
    const char *s = *sp;
    const char *top, *end;
    Token token = { NULL, NULL};

    while(*s && *s == sep)//skip top separators
        ++s;
    if(!*s){
        *sp = s;
        return token;
    }
    token.top = s;
    while(*s && *s != sep){
        if(*s == ESCAPE)
            ++s;
        else if(*s == '"'){
            char *p = strchr(s + 1, '"');//search end '"'
            while(p && p[-1] == ESCAPE)
                p = strchr(p + 1, '"');
            if(p)
                s = p;
        }
        ++s;
    }
    token.end = s;
    *sp = s;

    return token;
}

int count_words(const char *s, char sep){
    int count = 0;
    Token token = getToken(&s, sep);

    while(token.top != NULL){
        ++count;
        token = getToken(&s, sep);
    }
    return count;
}

char *remove_escape(char *s){
    char *from, *to;
    from = to = s;
    while(*from){
        if(*from != ESCAPE)
            *to++ = *from;
        ++from;
    }
    *to = 0;
    return s;
}

char *ft_strsub(Token token){
    size_t len = token.end - token.top;
    char *sub = malloc(len + 1);
    if (sub){
        memcpy(sub, token.top, len);
        sub[len] = 0;
    }
    return sub;
}

char **ft_strsplit(const char *s, char sep){
    int words;

    if (!s || !sep || !(words = count_words(s, sep)))
        return NULL;

    char **result = malloc(sizeof(char *) * (words + 1));
    if(!result){
        perror("malloc");
        return NULL;
    }

    Token token = getToken(&s, sep);
    int i = 0;

    while(token.top != NULL){
        result[i] = ft_strsub(token);
        remove_escape(result[i++]);
        token = getToken(&s, sep);
    }
    result[i] = NULL;

    return result;
}

void test(const char *text){
    printf("original:%s\n", text);
    printf("result of split:\n");
    char **s = ft_strsplit(text, ' ');
    int i = 0;
    while (s[i]){
        printf("%s\n", s[i]);
        free(s[i++]);
    }
    free(s);
    puts("");
}

int main(int argc, char **argv){
    test("Hello world \"not split\"");
    test("Hello world \"not \\\" split\"");//include " in "..."
    test("Hello world not\\ split");//escape separator

    return 0;
}

result:

original:Hello world "not split"
result of split:
Hello
world
"not split"

original:Hello world "not \" split"
result of split:
Hello
world
"not " split"

original:Hello world not\ split
result of split:
Hello
world
not split
BLUEPIXY
  • 39,699
  • 7
  • 33
  • 70