0

I have the following string that I am trying to parse for variables.

char data[]="to=myself@gmail.com&cc=youself@gmail.com&title=&content=how are you?&signature=best regards."

I started with strtok and the following code

char *to=parsePostData("to",data);

char* parsePostData(char s[],char t[])
{
  char *postVal;
  char *pch;
  char tCpy[512];//Make a copy. Otherwise, strtok works on the char pointer, and original char array gets modified/ corrupted.
  strcpy(tCpy,t);
  pch = strtok (tCpy,"=&");
  while (pch != NULL)
  {
      if(strcmp(pch,s)==0) {
            pch= strtok (NULL, "&");
                return pch;          
      }else{
        pch = strtok (NULL, "=&");  
      }
  }      
}

This works fine, except when it comes to consecutive delimiters such as the one after "title". So I found this custom strtok_single implementation. Need to know when no data appears between two token separators using strtok()

char * strtok_single (char * str, char const * delims)
{
  static char  * src = NULL;
  char  *  p,  * ret = 0;

  if (str != NULL)
    src = str;

  if (src == NULL)
    return NULL;

  if ((p = strpbrk (src, delims)) != NULL) {
    *p  = 0;
    ret = src;
    src = ++p;
  }

  return ret;
}

But with this, the problem is I cannot get "signature", as there is no & delimiter after that.

How can I get a mix of this two, so I dont miss out the last variable, and I can handle consecutive delimiters?

Community
  • 1
  • 1
aVC
  • 2,254
  • 2
  • 24
  • 46
  • Maybe look at http://stackoverflow.com/questions/16807188/strtok-analogue-in-c?rq=1. This will split on both "=&" and "&" in one pass, though I guess you'll get a spurious blank on "=&", if I understand what they're doing correctly. – mtrw May 18 '15 at 02:14
  • @mtrw I am working on arduino IDE. If possible I would like to not add more libraries for program size concerns. I hope there is a way to modify the above code to get the result, but if not I will look into the method you suggested. – aVC May 18 '15 at 02:21
  • @Cicada I am using Arduino IDE, and I am a beginner. I believe it uses C,C++ http://stackoverflow.com/a/11813275/903978 – aVC May 18 '15 at 02:29
  • @Cicada, C would be better. thanks :) – aVC May 18 '15 at 02:36
  • I used this one and it works for me. http://stackoverflow.com/a/3375658/903978 – aVC May 18 '15 at 03:26

2 Answers2

4

There are two bugs lurking here. One is in strtok_single(). If you run it repeatedly, it does not return the last segment, after the = after signature, unlike strtok().

When that's fixed, there is still a problem with the code in parsePostData(); it returns a pointer to an automatic variable. The copy of the string must be handled differently; the simplest way (which is consistent with using strtok() rather than strtok_r() or strtok_s()) is to make the tCpy variable static.

Test program emt.c

This is a composite program that shows the problems and also a set of fixes. It applies different 'splitter' functions — functions with the same signature as strtok() — to the data. It demonstrates the bug in strtok_single() and that strtok_fixed() fixes that bug. It demonstrates that the code in parsePostData() works correctly when it is fixed and strtok_fixed() is used.

#include <stdio.h>
#include <string.h>

/* Function pointer for strtok, strtok_single, strtok_fixed */
typedef char *(*Splitter)(char *str, const char *delims);

/* strtok_single - as quoted in SO 30294129 (from SO 8705844) */
static char *strtok_single(char *str, char const *delims)
{
    static char  *src = NULL;
    char  *p,  *ret = 0;

    if (str != NULL)
        src = str;

    if (src == NULL)
        return NULL;

    if ((p = strpbrk(src, delims)) != NULL)
    {
        *p  = 0;
        ret = src;
        src = ++p;
    }

    return ret;
}

/* strtok_fixed - fixed variation of strtok_single */
static char *strtok_fixed(char *str, char const *delims)
{
    static char  *src = NULL;
    char  *p,  *ret = 0;

    if (str != NULL)
        src = str;

    if (src == NULL || *src == '\0')    // Fix 1
        return NULL;

    ret = src;                          // Fix 2
    if ((p = strpbrk(src, delims)) != NULL)
    {
        *p  = 0;
        //ret = src;                    // Unnecessary
        src = ++p;
    }
    else
        src += strlen(src);

    return ret;
}

/* Raw test of splitter functions */
static void parsePostData1(const char *s, const char *t, Splitter splitter)
{
    static char tCpy[512];
    strcpy(tCpy, t);
    char *pch = splitter(tCpy, "=&");
    while (pch != NULL)
    {
        printf("  [%s]\n", pch);
        if (strcmp(pch, s) == 0)
            printf("matches %s\n", s);
        pch = splitter(NULL, "=&");
    }
}

/* Fixed version of parsePostData() from SO 30294129 */
static char *parsePostData2(const char *s, const char *t, Splitter splitter)
{
    static char tCpy[512];
    strcpy(tCpy, t);
    char *pch = splitter(tCpy, "=&");
    while (pch != NULL)
    {
        if (strcmp(pch, s) == 0)
        {
            pch = splitter(NULL, "&");
            return pch;
        }
        else
        {
            pch = splitter(NULL, "=&");
        }
    }
    return NULL;
}

/* Composite test program */
int main(void)
{
    char data[] = "to=myself@gmail.com&cc=youself@gmail.com&title=&content=how are you?&signature=best regards.";
    char *tags[] = { "to", "cc", "title", "content", "signature" };
    enum { NUM_TAGS = sizeof(tags) / sizeof(tags[0]) };

    printf("\nCompare variants on strtok()\n");
    {
        int i = NUM_TAGS - 1;
        printf("strtok():\n");
        parsePostData1(tags[i], data, strtok);
        printf("strtok_single():\n");
        parsePostData1(tags[i], data, strtok_single);
        printf("strtok_fixed():\n");
        parsePostData1(tags[i], data, strtok_fixed);
    }

    printf("\nCompare variants on strtok()\n");
    for (int i = 0; i < NUM_TAGS; i++)
    {
        char *value1 = parsePostData2(tags[i], data, strtok);
        printf("strtok: [%s] = [%s]\n", tags[i], value1);
        char *value2 = parsePostData2(tags[i], data, strtok_single);
        printf("single: [%s] = [%s]\n", tags[i], value2);
        char *value3 = parsePostData2(tags[i], data, strtok_fixed);
        printf("fixed:  [%s] = [%s]\n", tags[i], value3);
    }

    return 0;
}

Example output from emt

Compare variants on strtok()
strtok():
  [to]
  [myself@gmail.com]
  [cc]
  [youself@gmail.com]
  [title]
  [content]
  [how are you?]
  [signature]
matches signature
  [best regards.]
strtok_single():
  [to]
  [myself@gmail.com]
  [cc]
  [youself@gmail.com]
  [title]
  []
  [content]
  [how are you?]
  [signature]
matches signature
strtok_fixed():
  [to]
  [myself@gmail.com]
  [cc]
  [youself@gmail.com]
  [title]
  []
  [content]
  [how are you?]
  [signature]
matches signature
  [best regards.]

And:

Compare variants on strtok()
✓ strtok: [to] = [myself@gmail.com]
✓ single: [to] = [myself@gmail.com]
✓ fixed:  [to] = [myself@gmail.com]
✓ strtok: [cc] = [youself@gmail.com]
✓ single: [cc] = [youself@gmail.com]
✓ fixed:  [cc] = [youself@gmail.com]
✕ strtok: [title] = [content=how are you?]
✓ single: [title] = []
✓ fixed:  [title] = []
✓ strtok: [content] = [how are you?]
✓ single: [content] = [how are you?]
✓ fixed:  [content] = [how are you?]
✓ strtok: [signature] = [best regards.]
✕ single: [signature] = [(null)]
✓ fixed:  [signature] = [best regards.]

The correct (✓ = U+2713) and incorrect (✕ = U+2715) marks were added manually when posting the answer.

Observe how only the lines tagged 'fixed' contain exactly what is wanted each time around.

Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
1

You haven't exactly told us what you mean by "this works fine", though it seems sufficient to say that you want to parse an application/x-www-form-urlencoded string. Why didn't you say so in the first place?

Consider that the first field, key, may be terminated by the first of either '=' or '&'. It would be appropriate to search for a token that ends in either of those characters, to extract key.

The second field, value, however, isn't terminated by an '=' character, so it's inappropriate to be searching for that character to extract value. You'd want to search for '&' only.

Sure. You could use strtok to parse this, however I'm sure there are many more suitable tools. strcspn, for example, won't make any changes to data, which means you won't need to make a copy of data as you are...

#include <stdio.h>
#include <string.h>

int main(void) {
    char data[]="to=myself@gmail.com&cc=youself@gmail.com&title=&content=how are you?&signature=best regards.";

    char *key = data;
    do {
        int key_length = strcspn(key, "&=");

        char *value = key + key_length + (key[key_length] == '=');
        int value_length = strcspn(value, "&");

        printf("Key:   %.*s\n"
               "Value: %.*s\n\n",
               key_length,   key,
               value_length, value);

        key = value + value_length + (value[value_length] == '&');
    } while (*key);
    return 0;
}
autistic
  • 1
  • 3
  • 35
  • 80
  • by 'it works fine', I meant it does the job of finding the value of the given key. Sorry, It did nt cross my mind that adding "application/x-www-form-urlencoded string" was important. Thanks very much for your answer – aVC May 19 '15 at 04:37
  • 1
    The way I see it is, if a piece of code occasionally fails, then that piece of code *doesn't work*... – autistic May 19 '15 at 10:07
  • Ok got it. To me, occassionally sounds more like unpredictable, which is different in my case as I know when it fails. – aVC May 19 '15 at 18:00