2

The sample code has a very small rule to parse a string - a pipe('|') delimited string. The string cannot have characters ('|', '!', '(', ')' , '*').

When I compile the program( ex.cpp ) without the -O3 flags, it works fine. The version of g++ is 5.2

g++ -std=c++14 -I ${INC} -o ex.out ex.cpp

Note INC points to boost/1.77.0/include

./ex.out ABC
-----------------------------------------
processing string: "ABC"
<str_rule>
  <try>ABC</try>
  <success></success>
  <attributes>[]</attributes>
</str_rule>
ABC: parse succeeded:


However, when I compile it using the -O3 flags it crashes ( put -g so I can run gdb )

g++ -std=c++14 -I ${INC} -O3 -g -o ex.out ex.cpp
./ex.out ABC
-----------------------------------------
processing string: "ABC"
Segmentation fault (core dumped)

// ex.cpp below

//#define BOOST_SPIRIT_DEBUG

#include <boost/spirit/home/qi/char/char.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/spirit/include/qi_symbols.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/variant/recursive_wrapper.hpp>
#include <boost/lexical_cast.hpp>

#include <string>


namespace qi    = boost::spirit::qi;
namespace phx   = boost::phoenix;

template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It,  Skipper> {
    parser() : parser::base_type(str_rule) {

        using namespace qi;
        auto const str = *(char_ - '|' - '!' - '(' - ')' - '*') ;
        str_rule  =  (str % char_('|'));

        BOOST_SPIRIT_DEBUG_NODE( str_rule );
    }

    private:
        qi::rule<It,  std::string(), Skipper> str;
        qi::rule<It,  Skipper> str_rule;
};

int main(int argc, const char*argv[] )
{
    if( argc != 2 ) {
      std::cerr << "Usage: main.out <str rule>\n";
      return -1;
    }

    std::string r = argv[1];

    std::cout << "-----------------------------------------"  << std::endl;
    std::cout << "processing string: " << "\"" << r << "\""  << std::endl;
    typedef std::string::const_iterator It;
    It f(r.begin()), l(r.end());
    parser<It> p;

    try {
      bool ok = qi::phrase_parse(f,l,p ,qi::space);

      if (ok && f == l ) {
        std::cout << r << ": parse succeeded: \n";
      } else {
         std::cout << r << " failed to parse \n";
      }

    } catch (const qi::expectation_failure<It>& e) {
        std::cerr << "expectation_failure at '" << std::string(e.first, e.last) << "'\n";
    }

    return 0;
}

The code should also work with the -O3 optimization.

I tried with -O2 and -O1 and it crash for both of them.

Tried with g++ version 12.2 and it still crashes.

Tried with boost version 1.53 and still it crashes.

The gdb backtrace is as follows - which I do not quite understand.

#0  0x0000000000401f29 in function_base (this=0x7ffda94834c8) at /data/tools/boost/1.77.0/include/boost/function/function_base.hpp:603
603       function_base() : vtable(0) { }
Missing separate debuginfos, use: debuginfo-install glibc-2.17-292.el7.x86_64
(gdb) where
#0  0x0000000000401f29 in function_base (this=0x7ffda94834c8) at /data/tools/boost/1.77.0/include/boost/function/function_base.hpp:603
#1  function4 (this=0x7ffda94834c8) at /data/tools/boost/1.77.0/include/boost/function/function_template.hpp:706
#2  function (this=0x7ffda94834c8) at /data/tools/boost/1.77.0/include/boost/function/function_template.hpp:1076
#3  rule (name="", this=0x7ffda94834a0) at /data/tools/boost/1.77.0/include/boost/spirit/home/qi/nonterminal/rule.hpp:168
#4  parser<__gnu_cxx::__normal_iterator<char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::proto::exprns_::expr<boost::proto::tagns_::tag::terminal, boost::proto::argsns_::term<boost::spirit::tag::char_code<boost::spirit::tag::space, boost::spirit::char_encoding::standard> >, 0l> >::parser
    (this=0x7ffda9483430) at ex.cpp:20
#5  0x000000000040122f in main (argc=<optimized out>, argv=<optimized out>) at ex.cpp:48

Is there something I am doing wrong ?

user4581301
  • 33,082
  • 7
  • 33
  • 54

1 Answers1

1

This is an oft-repeated¹ trap:

    auto const str = *(char_ - '|' - '!' - '(' - ')' - '*');

This creates Proto expressions with dangling references. Everything after that is Undefined Behaviour

Fix it using proto::deep_copy, or the alias qi::copy:

auto const str = copy(*(char_ - '|' - '!' - '(' - ')' - '*'));
str_rule       = str % char_('|');

Now, also improve the rules by making the charset more efficient and the % delimiter too:

parser() : parser::base_type(str_rule) {
    using namespace qi;
    str_rule = *~qi::char_("|!()*") % '|';

    BOOST_SPIRIT_DEBUG_NODE(str_rule);
}

Live Simplified

#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;

template <typename It> struct parser : qi::grammar<It> {
    parser() : parser::base_type(start) {//
        start = *~qi::char_("|!()*") % '|';
    }
    qi::rule<It> start;
};

int main() {
    using It = std::string::const_iterator;
    parser<It> const p;

    for (std::string const r : {"", "|", "a|!()|"})
        try {
            std::cout << "input " << quoted(r);
            It   f(r.begin()), l(r.end());
            bool ok = qi::parse(f, l, p);

            if (ok && f == l)
                std::cout << r << ": parse succeeded\n";
            else
                std::cout << r << ": failed\n";
        } catch (qi::expectation_failure<It> const& e) {
            std::cerr << "expectation_failure at " << quoted(std::string(e.first, e.last)) << "\n";
        }
}

Prints

input "": parse succeeded
input "|"|: parse succeeded
input "a|!()|"a|!()|: failed

BONUS

If you don't enjoy this kind of UB and long compiletimes, consider using Spirit X3:

Also Live

#include <boost/spirit/home/x3.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;

int main() {
    auto const parser = *~x3::char_("|!()*") % '|';

    for (std::string const r : {"", "|", "a|!()|"}) try {
        std::cout << "input " << quoted(r);
        auto f(r.begin()), l(r.end());
        bool ok = parse(f, l, parser);

        if (ok && f == l)
            std::cout << r << ": parse succeeded\n";
        else
            std::cout << r << ": failed\n";
    } catch (x3::expectation_failure<std::string::const_iterator> const& e) {
        std::cerr << "expectation_failure at " << quoted(std::string(e.where(), r.end())) << "\n";
    }
}

Same output


¹ See inconsistent behavior of boost spirit grammar, Assigning parsers to auto variables, boost spirit V2 qi bug associated with optimization level etc

sehe
  • 374,641
  • 47
  • 450
  • 633