1

I am trying to combine multiple boost::spirit parse rules into bigger, composite rules, and have the following code:

#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/adt/adapt_adt.hpp>
#include <boost/fusion/include/adapt_adt.hpp>
#include <boost/variant.hpp>

#include <string> 
#include <vector>
#include <fstream>


namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

struct PdCanvas {
    int topX;
    int topY;
    int wX;
    int wY;
    std::string name;
    int openOnLoad; 
};


struct PdArrayData {
    std::vector<float> data; 
};


BOOST_FUSION_ADAPT_STRUCT(
    PdCanvas,
    (int, topX)
    (int, topY)
    (int, wX) 
    (int, wY)
    (std::string, name)
    (int, openOnLoad));


BOOST_FUSION_ADAPT_STRUCT(
    PdArrayData,
    (std::vector<float>, data)); 


using PdRecord = boost::variant<
PdArrayData,
PdCanvas
>;


template <typename Iterator> struct PdCanvasGrammar : qi::grammar<Iterator, PdCanvas()> {
    PdCanvasGrammar() : PdCanvasGrammar::base_type(start) {
        using namespace qi;
        start      = skip(space)[canvasRule >> eoi];
        name       = +(('\\' >> space) |graph);
        canvasRule = "#N canvas" >> int_ >> int_ >> int_ >> int_ >> name >> int_ >> ';';

        BOOST_SPIRIT_DEBUG_NODES((start)(canvasRule)(name))
    }

  private:
    qi::rule<Iterator, PdCanvas()> start;
    qi::rule<Iterator, PdCanvas(), qi::space_type> canvasRule;
    qi::rule<Iterator, std::string()> name;
};


template <typename Iterator> 
struct PdRecordGrammar : qi::grammar<Iterator, PdRecord(), ascii::space_type> {
    PdRecordGrammar () : PdRecordGrammar::base_type(recordRule) {         
        using namespace qi;
        
        arrayDataRule = (lit("#A") >>  +(qi::float_) >> ";");
        canvasRule = PdCanvasGrammar<Iterator>();
        recordRule = (canvasRule |  (arrayDataRule)  );
        BOOST_SPIRIT_DEBUG_NODES((arrayDataRule)(canvasRule)(recordRule))
    }

    
    qi::rule<Iterator, PdArrayData(), ascii::space_type> arrayDataRule; 
    qi::rule<Iterator, PdCanvas()> canvasRule;     
    qi::rule<Iterator, PdRecord(), ascii::space_type> recordRule;     
};



int main(int argc, char** argv)
{
  if(argc != 2)
    {
        std::cout << "Usage: "  <<argv[0] << " <PatchFile>" << std::endl;
        exit(1); 
    }

    std::ifstream inputFile(argv[1]); 
    std::string inputString(std::istreambuf_iterator<char>(inputFile), {}); 

    PdRecord root;
    PdRecordGrammar<std::string::iterator> parser;
    std::cout << "Loaded file:\n " << inputString << std::endl;


    PdCanvas canvObj;
    PdCanvasGrammar<std::string::iterator> canvParse;
    bool canvSuccess = qi::phrase_parse(inputString.begin(), inputString.end(), canvParse, boost::spirit::ascii::space, canvObj);
    std::cout << "Canvas success: " << canvSuccess << std::endl; 

    bool success = qi::phrase_parse(inputString.begin(), inputString.end(), parser, boost::spirit::ascii::space, root); 
    std::cout << "Success: " << success << std::endl;

    return 0; 

}

I then tested the code on the following string, which should parse:

#N canvas 0 0 400 300 moo 1;

Running the code gives the following output:

Loaded file:
 #N canvas 0 0 400 300 moo 1;

<start>
  <try>#N canvas 0 0 400 30</try>
  <canvasRule>
    <try>#N canvas 0 0 400 30</try>
    <name>
      <try>moo 1;\n</try>
      <success> 1;\n</success>
      <attributes>[[m, o, o]]</attributes>
    </name>
    <success>\n</success>
    <attributes>[[0, 0, 400, 300, [m, o, o], 1]]</attributes>
  </canvasRule>
  <success></success>
  <attributes>[[0, 0, 400, 300, [m, o, o], 1]]</attributes>
</start>
Canvas success: 1
<recordRule>
  <try>#N canvas 0 0 400 30</try>
  <canvasRule>
    <try>#N canvas 0 0 400 30</try>
    <fail/>
  </canvasRule>
  <arrayDataRule>
    <try>#N canvas 0 0 400 30</try>
    <fail/>
  </arrayDataRule>
  <fail/>
</recordRule>
Success: 0

As one can see, the file is successfully parsed by the standalone PdCanvasGrammar rule, but fails to parse when I attempt to use the composite PdRecordGrammar rule.

I assume I'm doing something wrong in combining the rules together, but I don't know what.

As an aside, the arrayDataRule, which is defined as part of the PdRecordGrammar, successfully parses its input:

Loaded file:
 #A 1 2 3 4 5;
<start>
  <try>#A 1 2 3 4 5;</try>
  <canvasRule>
    <try>#A 1 2 3 4 5;</try>
    <fail/>
  </canvasRule>
  <fail/>
</start>
Canvas success: 0
<recordRule>
  <try>#A 1 2 3 4 5;</try>
  <canvasRule>
    <try>#A 1 2 3 4 5;</try>
    <fail/>
  </canvasRule>
  <arrayDataRule>
    <try>#A 1 2 3 4 5;</try>
    <success></success>
    <attributes>[[[1, 2, 3, 4, 5]]]</attributes>
  </arrayDataRule>
  <success></success>
  <attributes>[[[1, 2, 3, 4, 5]]]</attributes>
</recordRule>
Success: 1

So my assumption is that if I just took the PdCanvasGrammar rule and defined it intrinsically to the PdRecord rule, that it would work. But I'm trying to understand how to properly combine rules so that no single rule gets too large and unwieldy.

So my question is thus: Why does this rule as defined not work, and what is the proper way to combine rules into a composite rule?

Versions:

Boost: 1.75.0

C++ standard: 11

GCC: gcc version 4.8.5 20150623 (Red Hat 4.8.5-44) (GCC)

stix
  • 1,140
  • 13
  • 36
  • Here us MCVE: https://godbolt.org/z/WfxGM1zPK (output doesn't match :/). – Marek R Apr 06 '23 at 18:20
  • @MarekR I'm using boost 1.75 and c++11. Looks like you've got it set up for c++20 – stix Apr 06 '23 at 18:27
  • this is not the issue: https://godbolt.org/z/PGxP4e789 – Marek R Apr 06 '23 at 18:48
  • @MarekR Dunno what to say. It doesn't segfault for me. – stix Apr 06 '23 at 19:06
  • That's the nature of [UB](https://en.wikipedia.org/wiki/Undefined_behavior). It may even look like it's working. And then one sunday launch the nuclear missile by accident. – sehe Apr 07 '23 at 00:39
  • forgot uo add address sanitizer: https://godbolt.org/z/sovhfsGhM note it reports stack overflow – Marek R Apr 07 '23 at 06:44
  • @MarekR When I saw the segfault happened I expected a stack overflow as well. Note, that's NOT a stack overflow. It's a buffer overflow - very different animal. Still UB of course – sehe Apr 07 '23 at 15:48

1 Answers1

2

This:

canvasRule    = PdCanvasGrammar<Iterator>();

creates a reference to a dangling temporary. Fix it:

template <typename Iterator>
struct PdRecordGrammar : qi::grammar<Iterator, PdRecord(), qi::space_type> {
    PdRecordGrammar() : PdRecordGrammar::base_type(recordRule) {
        arrayDataRule = "#A" >> +qi::float_ >> ";";
        recordRule    = canvasRule | arrayDataRule;

        BOOST_SPIRIT_DEBUG_NODES((arrayDataRule)(recordRule))
    }

  private:
    qi::rule<Iterator, PdArrayData(), qi::space_type> arrayDataRule;
    PdCanvasGrammar<Iterator>                         canvasRule;
    qi::rule<Iterator, PdRecord(), qi::space_type>    recordRule;
};

Simplify

  • I've simplified some parts before. There's no reason to undo that.
  • You're not using the adapt_adt header (and that's for the best). Why include that?
  • Why use the pre-c++11 versions of the Fusion ADAPT_XXXX macros?
  • Why use phrase_parse when the grammar has no skipper (or overrides it internally). See Boost spirit skipper issues again for context.
  • There's redundant lit(), parentheses and ; in quite a few places.
  • You have using namespace qi but still qualify qi::float_.
  • One grammar declares rules private, the other doesn't
  • The scopes of variables doesn't match the use (in main). This leads to lifetime/variable re-use bugs. Or just code that's hard to maintain.
  • Your input can use const_iterator. It should.

Live Demo

// #define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <iomanip>

namespace qi = boost::spirit::qi;

struct PdCanvas { int topX, topY, wX, wY, openOnLoad; std::string name; };
struct PdArrayData { std::vector<float> data; };

BOOST_FUSION_ADAPT_STRUCT(PdCanvas, , topX, topY, wX, wY, name, openOnLoad)
BOOST_FUSION_ADAPT_STRUCT(PdArrayData, data)

using PdRecord = boost::variant<PdArrayData, PdCanvas>;

template <typename Iterator> struct PdCanvasGrammar : qi::grammar<Iterator, PdCanvas()> {
    PdCanvasGrammar() : PdCanvasGrammar::base_type(start) {
        using namespace qi;
        start      = skip(space)[canvasRule >> eoi];
        name       = +('\\' >> space | graph);
        canvasRule = "#N canvas" >> int_ >> int_ >> int_ >> int_ >> name >> int_ >> ';';

        BOOST_SPIRIT_DEBUG_NODES((start)(canvasRule)(name))
    }

  private:
    qi::rule<Iterator, PdCanvas()>                 start;
    qi::rule<Iterator, PdCanvas(), qi::space_type> canvasRule;
    qi::rule<Iterator, std::string()>              name;
};

template <typename Iterator>
struct PdRecordGrammar : qi::grammar<Iterator, PdRecord(), qi::space_type> {
    PdRecordGrammar() : PdRecordGrammar::base_type(recordRule) {
        arrayDataRule = "#A" >> +qi::float_ >> ";";
        recordRule    = canvasRule | arrayDataRule;

        BOOST_SPIRIT_DEBUG_NODES((arrayDataRule)(recordRule))
    }

  private:
    qi::rule<Iterator, PdArrayData(), qi::space_type> arrayDataRule;
    PdCanvasGrammar<Iterator>                         canvasRule;
    qi::rule<Iterator, PdRecord(), qi::space_type>    recordRule;
};

int main() {
    std::string const input = "#N canvas 0 0 400 300 moo 1; ";
    std::cout << "Input:\n " << quoted(input) << std::endl;

    {
        PdCanvas obj;
        PdCanvasGrammar<std::string::const_iterator> const canvParse;
        std::cout << "Canvas success: " << parse(input.begin(), input.end(), canvParse, obj) << "\n";
    }

    {
        PdRecord                                     obj;
        PdRecordGrammar<std::string::const_iterator> const parser;
        std::cout << "Success: " << phrase_parse(input.begin(), input.end(), parser, qi::space, obj) << "\n";
    }
}

Output:

Input:
 "#N canvas 0 0 400 300 moo 1; "
Canvas success: 1
Success: 1
sehe
  • 374,641
  • 47
  • 450
  • 633
  • I'm afraid I just can't quite wrap my head around proper boost spirit, and the obtuse error messages when something goes wrong don't help either. When should I be using "eoi" in a rule (such as in canvas) and when should I be using lit() instead of just the string? The code you've given here works, but then when I try to combine it into a list of records, i.e. (+recordrule) it will no longer parse multiple records (e.g. a string with canvas *and* arraydata). I'm trying to understand the basics so I can build a bigger grammar, but every time I seem to fix one thing it breaks something else. – stix Apr 07 '23 at 17:25
  • 1
    You can always wrap a literal in `lit()`, but it's only required if the context isn't already a parser expression (more technically: "a proto expression template in the domain `qi::domain`"). I was just generally noticing some "noisy" constructs. They aren't /bad/ per se, but are often a sign lack of confidence and in larger quantities do hurt code readability. – sehe Apr 07 '23 at 18:34
  • 1
    " it will no longer parse multiple records" - you can always enable the debug output and you should be able to see where it goes wrong. You can ask another question. I'm here to help, but the Q&A format doesn't really work too well in the comments – sehe Apr 07 '23 at 18:35
  • 1
    the problem with multiple records was the eoi token in the Canvas rule. Removing it made the +records rule work and it successfully parses a string with both a canvas and arraydata record. According to documentation, "eoi" is for end of input, but I'm unclear exactly what that means other than returning success if it hits the end of a parse. When is it appropriate to put eoi in a rule? – stix Apr 07 '23 at 19:48
  • It means that the current iterator equals the end iterator (meaning the input _sequence_ is empty). You put it wherever you want to be sure all input has been consumed. Which was the case in your earlier question where I introduced it to guard against surprising partial parses. – sehe Apr 07 '23 at 19:51