3

I would like to check whether a string follows a certain pattern. I tried sscanf, but I do not get the desired result.

The pattern is simple: it consists of:

  • the string "while" followed by
  • one or more spaces, followed by
  • a string made of alpha characters or the underscore character, followed by
  • zero or more spaces, followed by
  • a colon (':'), followed by
  • the newline character ('\n')

Pattern examples:

  • while condition_a:
  • while test_b :

I tried the following, but it would not check for the column:

sscanf(string, "while %[a-z,_]s %[:]c", test, column);

Would you have any suggestion?

Mateusz Piotrowski
  • 8,029
  • 10
  • 53
  • 79
lcazarre
  • 713
  • 3
  • 9
  • 15

3 Answers3

6

Seems pretty straightforward to implement. You don't need either the unintuitive and quirky scanf(), nor non-portable (and, frankly, horrible) regular expressions:

int isValid(const char *s)
{
    // the string "while" followed by
    if (memcmp(s, "while", 5))
        return 0;

    s += 5;

    // one or more spaces, followed by
    if (!isspace(*s))
        return 0;

    while (isspace(*++s))
        ;

    // a string made of alpha characters or the underscore character,
    // (I assumed zero or more)
    while (isalpha(*s) || *s == '_')
        s++;

    // followed by zero or more spaces
    while (isspace(*s))
        s++;

    // followed by a column (':'),
    if (*s++ != ':')
        return 0;

    // followed by the newline character ('\n')
    if (*s++ != '\n')
        return 0;

    // here should be the end
    return !*s;
}
1

This test for your pattern seems to work:

   int n = 0;
   Bool ok = sscanf(string, "while%*[ ]%*[A-Za-z_] :%*1[\n]%n", &n) == 0 && 
      n && !string[n];

It is nice and short, but there are (at least) two flaws:

  • It is ugly
  • It allows arbitrary whitespace before the colon, not just spaces (e.g. tabs, newlines)

The only way to handle zero or more spaces in sscanf is to use it twice, once for one or more, and then again for zero. For example, this code:

   char tail[4] = "";
   Bool ok = (sscanf(string, "while%*[ ]%*[A-Za-z_]%*[ ]%3c", tail) == 1 || 
              sscanf(string, "while%*[ ]%*[A-Za-z_]%3c",      tail) == 1) && 
              !strcmp(tail, ":\n");
Joseph Quinsey
  • 9,553
  • 10
  • 54
  • 77
  • You should point out that the 'scan set' (`%[]`) is a conversion specification in its own right; you don't need to specify a `s` or a `c` after it. Your code handles that. You also insist on two spaces after `while` because `sizeof("while")` is 6 (the null at the end is counted). – Jonathan Leffler Jan 03 '14 at 21:16
1

A regular expression seems like a reasonable tool here:

#include <assert.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>

int main() {
    const char *expression = "^while +([a-zA-Z_]+) *:\n$";
    const char *input = NULL;
    regex_t regex;
    int rc;

    size_t nmatch = 2;
    regmatch_t pmatch[2];

    rc = regcomp(&regex, expression, REG_EXTENDED);
    assert(rc == 0);

    input = "while condition_a:\n";
    rc = regexec(&regex, input, nmatch, pmatch, 0);
    if(rc == 0) {
        printf("Match: %.*s\n", (int)(pmatch[1].rm_eo - pmatch[1].rm_so), input + pmatch[1].rm_so);
    } else if (rc == REG_NOMATCH) {
        printf("No match\n");
    } else {
        char msgbuf[64];
        regerror(rc, &regex, msgbuf, sizeof(msgbuf));
        printf("Regex match failed: %s\n", msgbuf);
    }

    input = "while test_b :\n";
    rc = regexec(&regex, input, nmatch, pmatch, 0);
    if(rc == 0) {
        printf("Match: %.*s\n", (int)(pmatch[1].rm_eo - pmatch[1].rm_so), input + pmatch[1].rm_so);
    } else if (rc == REG_NOMATCH) {
        printf("No match\n");
    } else {
        char msgbuf[64];
        regerror(rc, &regex, msgbuf, sizeof(msgbuf));
        printf("Regex match failed: %s\n", msgbuf);
    }

    regfree(&regex);
}

This will output:

Match: condition_a
Match: test_b
Bill Lynch
  • 80,138
  • 16
  • 128
  • 173