2

I am trying to write a language parser with Boost::Spirit. I read the tutorial and tried the following code to parse a function with the syntax: def myfunc(arg1 type1, arg2, type2 ...) return_type:

AST:

namespace ast {

enum Type { BOOL, INT32, FLOAT32 };

using Identifier = std::string;

using TypedIdentifier = std::tuple<Identifier, Type>;

using ArgList = std::vector<TypedIdentifier>;

using FunctionDef = std::tuple<Identifier, ArgList, Type>;
}

Parser:

namespace parser {

struct Identifier
    : qi::grammar<string::iterator, ast::Identifier(), ascii::space_type> {
  Identifier() : Identifier::base_type(start) {
    start = qi::char_("[a-zA-Z_]") >> *qi::char_("[a-zA-Z_0-9]");
  }
  qi::rule<string::iterator, ast::Identifier(), ascii::space_type> start;
};

struct Type : qi::symbols<char, ast::Type> {
  Type() {
    add("int32", ast::INT32)("float32", ast::FLOAT32)("bool", ast::BOOL);
  }
};

struct TypedIdentifier
    : qi::grammar<string::iterator, ast::TypedIdentifier(), ascii::space_type> {
  TypedIdentifier() : TypedIdentifier::base_type(start) {
    start = Identifier() >> Type();
  }
  qi::rule<string::iterator, ast::TypedIdentifier(), ascii::space_type> start;
};

struct FunctionDef
    : qi::grammar<string::iterator, ast::FunctionDef(), ascii::space_type> {
  FunctionDef() : FunctionDef::base_type(start) {
    start = "def" >> Identifier() >> "(" >> (TypedIdentifier() % ",") >> ")" >>
            Type() >> ":";
  }
  qi::rule<string::iterator, ast::FunctionDef(), ascii::space_type> start;
};
}

Then I get a segfault when trying to parse a simple code snipped. The segfault happens when trying to parse a function definition but I debugged a bit and the segfault happens already when trying to parse a typed identifier.

int main() {
  string foo("foo int32");
  auto begin = foo.begin();
  auto end = foo.end();
  ast::TypedIdentifier id;
  bool result = qi::phrase_parse(begin, end, parser::TypedIdentifier(),
                                 ascii::space, id);
  cout << "Parse " << (result ? "successful " : "failed ") << endl;
  return 0;
}

I tested the Identifier and Type parsers and they work fine on their own. I also tried defining global grammars instead of instantiating new ones but I also get the segfault. What am I doing wrong here?

ElefEnt
  • 2,027
  • 1
  • 16
  • 20

1 Answers1

1

The linked answer¹ indeed shows what's wrong (there are references to temporaries in the grammar rules).

I suggest that you don't need to create grammar<> instances for every single production. Instead, it's way more efficient (and elegant) to group them as rules into a grammar:

Live On Coliru

#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/std_tuple.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

namespace ast {

    enum Type { BOOL, INT32, FLOAT32 };

    using Identifier      = std::string;
    using TypedIdentifier = std::tuple<Identifier, Type>;
    using ArgList         = std::vector<TypedIdentifier>;
    using FunctionDef     = std::tuple<Identifier, ArgList, Type>;

    std::ostream& operator<<(std::ostream& os, Type v) {
        switch (v) {
            case BOOL:    return os << "BOOL";
            case INT32:   return os << "INT32";
            case FLOAT32: return os << "FLOAT32";
        }
        return os << "?";
    }

    template <typename... Ts> std::ostream& operator<<(std::ostream& os, std::tuple<Ts...> const& v) {
        return os << boost::fusion::as_vector(v);
    }

    template <typename T> std::ostream& operator<<(std::ostream& os, std::vector<T> const& v) {
        os << "{";
        for (auto& el : v) os << el << " ";
        return os << "}";
    }
}

namespace parser {

    template <typename Iterator>
    struct MyGrammarImpl : qi::grammar<Iterator, ast::FunctionDef(), ascii::space_type> {
        MyGrammarImpl() : MyGrammarImpl::base_type(functionDef) 
        {
            identifier      = qi::char_("[a-zA-Z_]") >> *qi::char_("[a-zA-Z_0-9]");
            typedIdentifier = identifier >> type;
            functionDef     = "def" >> identifier >> '(' >> (typedIdentifier % ",") >> ')' >> type >> ":";
            type            = type_;

            BOOST_SPIRIT_DEBUG_NODES((identifier)(typedIdentifier)(type)(functionDef))
        }
      private:
        qi::rule<Iterator, ast::TypedIdentifier(), ascii::space_type> typedIdentifier;
        qi::rule<Iterator, ast::FunctionDef(),     ascii::space_type> functionDef;
        qi::rule<Iterator, ast::Type(),            ascii::space_type> type;

        // lexemes
        qi::rule<Iterator, ast::Identifier()> identifier;

        struct Type : qi::symbols<char, ast::Type> {
            Type() {
                add("int32", ast::INT32)
                   ("float32", ast::FLOAT32)
                   ("bool", ast::BOOL)
                   ;
            }
        };

        Type type_;
    };

    using MyGrammar = MyGrammarImpl<std::string::const_iterator>;
}

int main() {
    std::string const foo("def bar(foo int32 ) bool:");

    auto begin = foo.begin();
    auto end = foo.end();

    ast::FunctionDef def;

    bool result = qi::phrase_parse(begin, end, parser::MyGrammar(), ascii::space, def);

    std::cout << "Parse " << (result ? "successful " : "failed ") << std::endl;
    if (result)
        std::cout << def << "\n";
}

Prints:

Parse successful 
(bar {(foo INT32) } BOOL)

With debug info:

<functionDef>
<try>def bar(foo int32 ) </try>
<identifier>
    <try>bar(foo int32 ) bool</try>
    <success>(foo int32 ) bool:</success>
    <attributes>[[b, a, r]]</attributes>
</identifier>
<typedIdentifier>
    <try>foo int32 ) bool:</try>
    <identifier>
    <try>foo int32 ) bool:</try>
    <success> int32 ) bool:</success>
    <attributes>[[f, o, o]]</attributes>
    </identifier>
    <type>
    <try> int32 ) bool:</try>
    <success> ) bool:</success>
    <attributes>[INT32]</attributes>
    </type>
    <success> ) bool:</success>
    <attributes>[[[f, o, o], INT32]]</attributes>
</typedIdentifier>
<type>
    <try> bool:</try>
    <success>:</success>
    <attributes>[BOOL]</attributes>
</type>
<success></success>
<attributes>[[[b, a, r], [[[f, o, o], INT32]], BOOL]]</attributes>
</functionDef>

¹ Internal Boost::Spirit code segfaults when parsing a composite grammar

Community
  • 1
  • 1
sehe
  • 374,641
  • 47
  • 450
  • 633
  • Thanks a lot! I have a few question on your changes: - Why is a type rule needed? I thought type_ was good enough to parse. - Why no ascii::space_type for the identifier rule? - Right now I store the results in stl container and I don't even know what they actually contain. Is there a way to use custom structs/classes as attributes? What is the idiomatic way to parse to functional AST objects? – ElefEnt Jan 25 '16 at 16:06
  • I added the type rule for debugging. The identified must be a lexeme. The rest is probably best asked separately so we know what you mean and it gets due attention. – sehe Jan 25 '16 at 17:27
  • I was looking at some examples and I found one where a temporary variable uint_ is used to define the grammar: http://www.boost.org/doc/libs/1_60_0/libs/spirit/example/qi/compiler_tutorial/calc2.cpp So to be consistent we should maybe file a bug for that segfault? :) – ElefEnt Jan 27 '16 at 17:56