2

I have the following code in C whith the violation of MISRAC:2012 Rule 4.1 (i.e., Octal and hex adecimal escape sequences shall be terminated). I have not clear which is the right Compliant Solution to solve this following code.

static const char rcsid[] = "\n@(#) $Revision: 243501 $\n";

and

rerrs[] = {
    { REG_NOMATCH,  "REG_NOMATCH",  "llvm_regexec() failed to match" },
    { REG_BADPAT,   "REG_BADPAT",   "invalid regular expression" },
    { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
    { REG_ECTYPE,   "REG_ECTYPE",   "invalid character class" },
    { REG_EESCAPE,  "REG_EESCAPE",  "trailing backslash (\\)" },
    { REG_ESUBREG,  "REG_ESUBREG",  "invalid backreference number" },
    { REG_EBRACK,   "REG_EBRACK",   "brackets ([ ]) not balanced" },
    { REG_EPAREN,   "REG_EPAREN",   "parentheses not balanced" },
    { REG_EBRACE,   "REG_EBRACE",   "braces not balanced" },
    { REG_BADBR,    "REG_BADBR",    "invalid repetition count(s)" },
    { REG_ERANGE,   "REG_ERANGE",   "invalid character range" },
    { REG_ESPACE,   "REG_ESPACE",   "out of memory" },
    { REG_BADRPT,   "REG_BADRPT",   "repetition-operator operand invalid" },
    { REG_EMPTY,    "REG_EMPTY",    "empty (sub)expression" },
    { REG_ASSERT,   "REG_ASSERT",   "\"can't happen\" -- you found a bug" },
    { REG_INVARG,   "REG_INVARG",   "invalid argument to regex routine" },
    { 0,        "",     "*** unknown regexp error code ***" }
};

About this one the Understand tool of Scitool says the violation is in line 58:

enter image description here

all code for the second violation is

*-
 * This code is derived from OpenBSD's libc/regex, original license follows:
 *
 * Copyright (c) 1992, 1993, 1994 Henry Spencer.
 * Copyright (c) 1992, 1993, 1994
 *  The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Henry Spencer.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *  @(#)regerror.c  8.4 (Berkeley) 3/20/94
 */

#include <sys/types.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include <stdlib.h>
#include "regex_impl.h"

#include "regutils.h"

#ifdef _MSC_VER
#define snprintf _snprintf
#endif

static const char *regatoi(const llvm_regex_t *, char *, int);

static struct rerr {
    int code;
    const char *name;
    const char *explain;
} rerrs[] = {
    { REG_NOMATCH,  "REG_NOMATCH",  "llvm_regexec() failed to match" },
    { REG_BADPAT,   "REG_BADPAT",   "invalid regular expression" },
    { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
    { REG_ECTYPE,   "REG_ECTYPE",   "invalid character class" },
    { REG_EESCAPE,  "REG_EESCAPE",  "trailing backslash (\\)" },
    { REG_ESUBREG,  "REG_ESUBREG",  "invalid backreference number" },
    { REG_EBRACK,   "REG_EBRACK",   "brackets ([ ]) not balanced" },
    { REG_EPAREN,   "REG_EPAREN",   "parentheses not balanced" },
    { REG_EBRACE,   "REG_EBRACE",   "braces not balanced" },
    { REG_BADBR,    "REG_BADBR",    "invalid repetition count(s)" },
    { REG_ERANGE,   "REG_ERANGE",   "invalid character range" },
    { REG_ESPACE,   "REG_ESPACE",   "out of memory" },
    { REG_BADRPT,   "REG_BADRPT",   "repetition-operator operand invalid" },
    { REG_EMPTY,    "REG_EMPTY",    "empty (sub)expression" },
    { REG_ASSERT,   "REG_ASSERT",   "\"can't happen\" -- you found a bug" },
    { REG_INVARG,   "REG_INVARG",   "invalid argument to regex routine" },
    { 0,        "",     "*** unknown regexp error code ***" }
};

/*
 - llvm_regerror - the interface to error numbers
 = extern size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t);
 */
/* ARGSUSED */
size_t
llvm_regerror(int errcode, const llvm_regex_t *preg, char *errbuf, size_t errbuf_size)
{
    struct rerr *r;
    size_t len;
    int target = errcode &~ REG_ITOA;
    const char *s;
    char convbuf[50];

    if (errcode == REG_ATOI)
        s = regatoi(preg, convbuf, sizeof convbuf);
    else {
        for (r = rerrs; r->code != 0; r++)
            if (r->code == target)
                break;

        if (errcode&REG_ITOA) {
            if (r->code != 0) {
                assert(strlen(r->name) < sizeof(convbuf));
                (void) llvm_strlcpy(convbuf, r->name, sizeof convbuf);
            } else
                (void)snprintf(convbuf, sizeof convbuf,
                    "REG_0x%x", target);
            s = convbuf;
        } else
            s = r->explain;
    }

    len = strlen(s) + 1;
    if (errbuf_size > 0) {
        llvm_strlcpy(errbuf, s, errbuf_size);
    }

    return(len);
}

/*
 - regatoi - internal routine to implement REG_ATOI
 */
static const char *
regatoi(const llvm_regex_t *preg, char *localbuf, int localbufsize)
{
    struct rerr *r;

    for (r = rerrs; r->code != 0; r++)
        if (strcmp(r->name, preg->re_endp) == 0)
            break;
    if (r->code == 0)
        return("0");

    (void)snprintf(localbuf, localbufsize, "%d", r->code);
    return(localbuf);
}
Gianni Spear
  • 7,033
  • 22
  • 82
  • 131
  • 1
    What line does it point to? I don't see any octal or hex escape sequences. – Eugene Sh. Feb 11 '19 at 18:45
  • 1
    Try this: `static const char rcsid[] = "\n" "@(#) $Revision: 243501 $\n";` if I understand the issue the rule is talking about correctly... – Eugene Sh. Feb 11 '19 at 18:47
  • @EugeneSh. technically a `0` is a single octal digit – Mgetz Feb 11 '19 at 18:47
  • @Mgetz I don't see escaped `\0` either – Eugene Sh. Feb 11 '19 at 18:49
  • Thanks @EugeneSh. the "static const char rcsid[] = "\n" "@(#) $Revision: 243501 $\n";" is a complain solution under the MISRA codecheck that I am using – Gianni Spear Feb 11 '19 at 18:54
  • So does anyone understand what the MISRA verifier is actually complaining about here? – Steve Summit Feb 11 '19 at 18:56
  • 2
    @SteveSummit They don't like when an escape sequence is followed with unrelated characters without separation, causing confusion. I hate MISRA, by the way... – Eugene Sh. Feb 11 '19 at 18:57
  • @SteveSummit. I am honest, I didn't understand how to resolve this complain, also because the code comes from an another team, and I can not access to the total infos. – Gianni Spear Feb 11 '19 at 18:58
  • @EugeneSh. I thought it was something like that (and I agree with your opinion about MISRA), but: I still don't understand. What, under *any* interpretation, is "unterminated" about `\n`? (I guess this would make `printf("Hello\nworld") similarly "unterminated".) – Steve Summit Feb 11 '19 at 18:59
  • @EugeneSh. I update my question. For example, the second violation is for me a mystery. The tool says line 58. I am hating MISRA too! – Gianni Spear Feb 11 '19 at 18:59
  • @GianniSpear Looks like your checker is buggy. Or you have made some modifications and the report is not pointing to the right places anymore – Eugene Sh. Feb 11 '19 at 19:01
  • @EugeneSh. often is precise, but when the code is "complex" the checker is not precise. – Gianni Spear Feb 11 '19 at 19:03
  • Well, probably it is complaining about all of the strings where you have escaped sequences followed by something else. You have few. Like `"trailing backslash (\\)"` -> `"trailing backslash (\\" ")"` – Eugene Sh. Feb 11 '19 at 19:03
  • I think it's looking at the "0" in the last initializer. The message "octal and hexidecimal escape sequences shall be terminated" suggest that it found such a sequence that is not terminated. I don't see how such a sequence could not be terminated. – dbush Feb 11 '19 at 19:04
  • @EugeneSh. I update my question with all file *.c with the violation. I don't know if your checker gives the same line – Gianni Spear Feb 11 '19 at 19:06
  • @GianniSpear I don't have any checker, so I can't verify :) Fix the strings so every escape sequence is either followed by space or at the end of the string literal. – Eugene Sh. Feb 11 '19 at 19:06
  • @EugeneSh. Sorry! – Gianni Spear Feb 11 '19 at 19:07
  • @EugeneSh. I didn't get your suggestion (again sorry) – Gianni Spear Feb 11 '19 at 19:10
  • 1
    This: `"trailing backslash (\\)"` -> `"trailing backslash (\\" ")"` and `"\"can't happen\" -- you found a bug"` -> `"\"" "can't happen\" -- you found a bug"` – Eugene Sh. Feb 11 '19 at 19:12

1 Answers1

1

There are no octal/hex escape sequence in the code posted, so your static analyser is wrong.

\n, \" and \\ are "simple escape sequences", which MISRA-C has nothing to say about.

The line static const char rcsid[] = "\n@(#) $Revision: 243501 $\n"; is MISRA-C compliant.

Lundin
  • 195,001
  • 40
  • 254
  • 396
  • Technically [0 is an octal in C and C++](https://stackoverflow.com/a/6895543/332733) and [C answer](https://stackoverflow.com/a/26625392/332733). That said that's not an escape sequence. More than likely the MISRA tool runs against the preprocessed source and the actual sequence is in one of the macros. – Mgetz Feb 12 '19 at 14:01
  • @Mgetz MISRA-C allows the octal escape sequence zero `\0` as a special exception. But that's not relevant here. – Lundin Feb 12 '19 at 15:01