4

I have following peace of code:

#include <gtest/gtest.h>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_eps.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/spirit/include/phoenix_bind.hpp>
#include <boost/spirit/include/classic_core.hpp>
#include <boost/spirit/include/classic_confix.hpp>
#include <boost/spirit/include/classic_chset.hpp>
#include <boost/spirit/include/classic_utility.hpp>
#include <boost/fusion/include/cons.hpp>
#include <boost/fusion/include/at_c.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/bind.hpp>
#include <boost/optional/optional_io.hpp>
#include <boost/variant.hpp>

namespace spi = boost::spirit;
namespace qi = boost::spirit::qi;

TEST(TestBoost, cpp_comment)
{
    using qi::char_;
    using qi::omit;
    using qi::eoi;
    typedef std::string::const_iterator iter;

    const std::string example = "/* this should be ignored */";
    auto b = std::begin(example);
    auto e = std::end(example);

    qi::rule<iter, std::string()> cpp_comment = char_('/') >> char_('/') >> *(char_ - '\n') >> (char_('\n') | omit[eoi]);
    qi::rule<iter, std::string()> c_comment = char_('/') >> char_('*') >> *(char_ - "*/") >> char_('*') >> char_('/');
    qi::rule<iter, std::string()> shell_comment = char_('#') >> *(char_ - '\n') >> (char_('\n') | omit[eoi]);
    qi::rule<iter, std::string()> comment = cpp_comment
            | c_comment
            | shell_comment
            ;

    std::string result;

    EXPECT_TRUE(qi::parse(b, e, comment, result));
    EXPECT_EQ(b, e);
    EXPECT_EQ(result, example);
}

that fails with following error:

[----------] 1 test from TestBoost
[ RUN      ] TestBoost.cpp_comment
tests/spirit.cpp:56: Failure
      Expected: result
      Which is: "//* this should be ignored */"
To be equal to: example
      Which is: "/* this should be ignored */"
[  FAILED  ] TestBoost.cpp_comment (0 ms)
[----------] 1 test from TestBoost (0 ms total)

and I don't understand why. There is probably somewhere in boost documentation mentioned this behavior, but I can't find it. Does anybody know why this happens?

If I put semantic action like this:

qi::rule<iter, std::string()> comment = cpp_comment[spi::_val = spi::_1]
            | c_comment[spi::_val = spi::_1]
            | shell_comment[spi::_val = spi::_1]
            ;

or this

qi::rule<iter, std::string()> comment = cpp_comment[spi::_val += spi::_1]
            | c_comment[spi::_val += spi::_1]
            | shell_comment[spi::_val += spi::_1]
            ;

it works, but I would really like to know why original code does not work.

sbi
  • 219,715
  • 46
  • 258
  • 445
Sasa
  • 1,597
  • 4
  • 16
  • 33
  • Please don't use unnecessary things like Google Test or dozens of includes in your minimal code. – sehe Nov 02 '16 at 02:23

1 Answers1

3

This is the classical problem with backtracking container attributes:

The idea here is to use qi::hold, or even better in this case, use qi::raw because it looks like you want to expose the entire matched input sequence as the attribute:

qi::rule<iter, std::string()>
      cpp_comment   = "//" >> *~char_('\n')   >> (eol|eoi),
      c_comment     = "/*" >> *(char_ - "*/") >> "*/",
      shell_comment = '#'  >> *~char_('\n')   >> (eol|eoi),
      comment       = qi::raw [ cpp_comment | c_comment | shell_comment ];

Live On Coliru

#include <boost/spirit/include/qi.hpp>
#include <cassert>

namespace qi = boost::spirit::qi;

void test() {
    using qi::char_;
    using qi::eol;
    using qi::eoi;

    std::string const example = "/* this should be ignored */";

    qi::rule<std::string::const_iterator, std::string()>
          cpp_comment   = "//" >> *~char_('\n')   >> (eol|eoi),
          c_comment     = "/*" >> *(char_ - "*/") >> "*/",
          shell_comment = '#'  >> *~char_('\n')   >> (eol|eoi),
          comment       = qi::raw [ cpp_comment | c_comment | shell_comment ];

    std::string result;

    bool ok = qi::parse(std::begin(example), std::end(example), comment >> eoi, result);
    assert(ok);

    std::cout << "expected: " << example << "\n";
    std::cout << "actual:   " << result << "\n";
    assert(result == example);
}

int main() {
    test();
}

Prints

expected: /* this should be ignored */
actual:   /* this should be ignored */
sehe
  • 374,641
  • 47
  • 450
  • 633
  • Just realized: with `raw` you don't need to synthesize `std::string` on the subrules. – sehe Nov 02 '16 at 08:29
  • thanks a lot for such nice answer. I just realized, that in my complex grammar, there is potentially many places where this effect could occur, a scary thought. Beside alternative operator, is there any other known operator where this could occur? – Sasa Nov 02 '16 at 21:10
  • It can occur for container attributes with non-atomical assignment. See the linked resources :) – sehe Nov 02 '16 at 21:15
  • ok, I will check it out once more. I still have one last question: if I put new line in example above (i.e. "/* this should \n be ignored */), then I only get partial match. Why? My assumption was that it matched cpp_comment, that is why I had changed my grammar. – Sasa Nov 02 '16 at 21:49
  • @Sasa I don't know what you mean. It's a match: http://coliru.stacked-crooked.com/a/62dad53319011f2c – sehe Nov 02 '16 at 22:41
  • sorry, I must have done something wrong. It works. Thanks a lot – Sasa Nov 03 '16 at 18:53