1

Thank you in advance!

I'd like to implement a code to analysis ipv4 address format such as "192.168.0.0".

So, I did like this.

#include <stdio.h>
#include <string.h>

typedef struct qppLexerObj
{
    char *mCursor;
    char *mLimit;
    char *mToken;

} qppLexerObj;

int qpfGetOctet(qppLexerObj *aLexer)
{
#define YYFILL(a)                                       \
    do                                                  \
    {                                                   \
        if (aLexer->mCursor > aLexer->mLimit)           \
        {                                               \
            return 0;           \
        }                                               \
        else                                            \
        {                                               \
        }                                               \
    } while (0);

    /*!re2c

      re2c:define:YYCTYPE     = "unsigned char";
      re2c:define:YYCURSOR    = aLexer->mCursor;
      re2c:define:YYLIMIT     = aLexer->mLimit;
      re2c:yyfill:enable      = 0;
      re2c:yyfill:enable      = 1;

      digit   = [0-9];
    */
begin:
    aLexer->mToken = aLexer->mCursor;
    /*!re2c

      digit+ { return 1; }
      [\.]        { return 2;      }
      [\*]        { return 3;     }
      .           { return 9999;  }
    */
}


int main()
{
    qppLexerObj aObj;
    int a;
    char sToken[512];
    char *sBuffer = "255.255.255.255";

    aObj.mCursor = sBuffer;
    aObj.mLimit  = aObj.mCursor + strlen(sBuffer);

    while ( (a = qpfGetOctet(&aObj)) != 0)
    {
        int len;

        len = aObj.mCursor - aObj.mToken;

        memset(sToken, 0, sizeof(sToken));
        strncpy(sToken, aObj.mToken, len);
        printf("Token = %d(%d) [%s]\n", a, len, sToken);
    }

    return 0;
}

But, the result is not what I expected.

re2c --case-insensitive -o addr_so.c addr_so.re
gcc -g -o addr_so addr_so.c

Token = 1(3) [255]
Token = 2(1) [.]
Token = 1(3) [255]
Token = 2(1) [.]
Token = 1(3) [255]
Token = 2(1) [.]
Token = 1(3) [255]
Token = 9999(1) []     <=== Wrong code happens!! SHOULD BE 0!

How can I detect the EOF without "9999", wrong code?

re2c seems that it sometimes can't detect EOF successfully.

1 Answers1

1

All looks logical. Just the terminating test is amiss. Recall that limit is set to first address past a valid character, as the re2c man page says to do.

-        if (aLexer->mCursor > aLexer->mLimit)           \
+        if (aLexer->mCursor >= aLexer->mLimit)           \

With this one line change, I get the results I think you expected.

Token = 1(3) [255]
Token = 2(1) [.]
Token = 1(3) [255]
Token = 2(1) [.]
Token = 1(3) [255]
Token = 2(1) [.]
WeakPointer
  • 3,087
  • 27
  • 22