0

I am using a function that was posted as an answer on another Stackoverflow question. The user who posted this however notes that: it does not handle consecutive delimiters.

I am wondering how I can modify this so that it could handle consecutive delimiters? I want to essentially ignore it when I have an extra delminator.

For example say I have something like this:

h2,3 d3,4 j3,3 y4,1 g4,3

I want to split this into an array of strings at each space, however as you can see in some cases there are multiple spaces. I simply want to ignore the extra delimiters.

Edit: Just to make it clear this is the code I am using from the answer I linked to above:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

char** str_split(char* a_str, const char a_delim)
{
    char** result    = 0;
    size_t count     = 0;
    char* tmp        = a_str;
    char* last_comma = 0;
    char delim[2];
    delim[0] = a_delim;
    delim[1] = 0;

    /* Count how many elements will be extracted. */
    while (*tmp)
    {
        if (a_delim == *tmp)
        {
            count++;
            last_comma = tmp;
        }
        tmp++;
    }

    /* Add space for trailing token. */
    count += last_comma < (a_str + strlen(a_str) - 1);

    /* Add space for terminating null string so caller
       knows where the list of returned strings ends. */
    count++;

    result = malloc(sizeof(char*) * count);

    if (result)
    {
        size_t idx  = 0;
        char* token = strtok(a_str, delim);

        while (token)
        {
            assert(idx < count);
            *(result + idx++) = strdup(token);
            token = strtok(0, delim);
        }
        assert(idx == count - 1);
        *(result + idx) = 0;
    }

    return result;
}

int main()
{
    char months[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
    char** tokens;

    printf("months=[%s]\n\n", months);

    tokens = str_split(months, ',');

    if (tokens)
    {
        int i;
        for (i = 0; *(tokens + i); i++)
        {
            printf("month=[%s]\n", *(tokens + i));
            free(*(tokens + i));
        }
        printf("\n");
        free(tokens);
    }

    return 0;
}
Community
  • 1
  • 1
ComputerLocus
  • 3,448
  • 10
  • 47
  • 96

3 Answers3

2

This should do the trick:

char** str_split(const char *str, char delimiter)
{
    int len, i, j;
    char* buf;
    char** ret;

    len = strlen(str);
    buf = malloc(len + 1);
    memcpy(buf, str, len + 1);

    j = 1;
    for (i = 0; i < len; ++i)
        if (buf[i] == delimiter)
        {
            while (buf[i + 1] == delimiter) i++;
            j++;
        }

    ret = malloc(sizeof(char*) * (j + 1));
    ret[j] = NULL;

    ret[0] = buf;
    j = 1;
    for (i = 0; i < len; ++i)
        if (buf[i] == delimiter)
        {
            buf[i] = '\0';
            while (buf[i + 1] == delimiter) i++;
            ret[j++] = &buf[i + 1];
        }
    return ret;
}

Drop the two lines while (buf[i + 1] == delimiter) i++; if you want it to disable sequenced delimiters overruning.

Havenard
  • 27,022
  • 5
  • 36
  • 62
1

The heavily voted answer in your SO question says note it does not handle consecutive delimiters, "JAN,,,FEB,MAR" - but does not substantiate the contributor's comment.

The function strsep() treats consecutive delimiters as containing an empty field, but the function strtok() does ignore multiple instances of (any combination of) the delimiter set. With MSVC, I get for this program

#include<stdio.h>
#include<string.h>

int main(void)
{
    char months[]= "JAN, ,\t   , ,FEB,MAR";
    char seps[] = ", \t\r\n";
    char *sptr;
    sptr = strtok(months, seps);
    while (sptr) {
        printf ("Month is: %s\n", sptr);
        sptr = strtok(NULL, seps);
    }
    return 0;
}

the output:

Month is: JAN
Month is: FEB
Month is: MAR

In your specific example (which I suspect might contain tabs), this would be

#include<stdio.h>
#include<string.h>

int main(void)
{
    char stuff[]= "h2,3      d3,4 j3,3 y4,1 g4,3";
    char seps[] = " \t";
    char *sptr;
    sptr = strtok(stuff, seps);
    while (sptr) {
        printf ("Stuff is: %s\n", sptr);
        sptr = strtok(NULL, seps);
    }
    return 0;
}

the output:

Stuff is: h2,3
Stuff is: d3,4
Stuff is: j3,3
Stuff is: y4,1
Stuff is: g4,3
Weather Vane
  • 33,872
  • 7
  • 36
  • 56
0

I think strtok() is capable of handling your requirement. From the man page

A sequence of two or more contiguous delimiter bytes in the parsed string is considered to be a single delimiter.

Sourav Ghosh
  • 133,132
  • 16
  • 183
  • 261