2

My ultimate goal is to write a hlsl shading language parser. My first experience with parsing has been by following bob nystrom's "crafting interpreters".

The issue I am currently facing is that I am trying to parse a 'chained member access' sequence (or multiple 'dot operators)....

first.Second.third

Obviously I could parse that into a list % sequence as a vector of strings, but I am trying to stick to the ast shown in the crafting interpreters book by having nested 'Get' nodes.

I am trying to parse this nested Get sequence so that I can eventually put that into a Set ast node. But I thought it would be best to at least get the 'Get' part first. before building on top of that.

https://craftinginterpreters.com/classes.html#set-expressions

Here's my minimal compiling program that tries to do that....


#include "boost/variant.hpp"
#include <boost/config/warning_disable.hpp>
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/include/std_tuple.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/position_tagged.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/spirit/home/x3/support/utility/annotate_on_success.hpp>
#include <boost/spirit/home/x3/support/utility/error_reporting.hpp>

#include <iostream>
#include <string>
#include <tuple>
#include <variant>

namespace hlsl {
namespace ast {

struct Get;
struct ExprVoidType {};

struct Variable {
    Variable(std::string name) : name(std::move(name)) {
    }
    Variable() = default;
    std::string name;
};

using Expr =
    boost::spirit::x3::variant<ExprVoidType,
                               boost::spirit::x3::forward_ast<Get>, Variable>;

struct Get {
    Get(Expr& object, std::string name) : object_{object}, name_{name} {
    }
    Get() = default;
    Expr object_;
    std::string name_;
};
} // namespace ast
} // namespace hlsl

struct visitor {

    using result_type = void;

    void operator()(const std::string name) {
        std::cout << name << "\n";
    }

    void operator()(const hlsl::ast::Get& get) {
        std::cout << "get expr\n";

        get.object_.apply_visitor(*this);
        std::cout << get.name_ << "\n";
    }

    void operator()(const hlsl::ast::Variable& var) {
        std::cout << var.name << "\n";
    };

    void operator()(const hlsl::ast::ExprVoidType& var){};
};

BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Variable, name)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Get, object_, name_)

namespace x3 = boost::spirit::x3;
namespace ascii = boost::spirit::x3::ascii;

using ascii::char_;
using ascii::space;
using x3::alnum;
using x3::alpha;
using x3::double_;
using x3::int_;
using x3::lexeme;
using x3::lit;

struct error_handler {
    template <typename Iterator, typename Exception, typename Context>
    x3::error_handler_result on_error(Iterator& first, Iterator const& last,
                                      Exception const& x,
                                      Context const& context) {
        auto& error_handler = x3::get<x3::error_handler_tag>(context).get();
        std::string message = "Error! Expecting: " + x.which() + " here:";
        error_handler(x.where(), message);
        return x3::error_handler_result::fail;
    }
};

/////////////////////////////////////////
// RULES
///////////////////////////////////////////

x3::rule<class identifier_class, std::string> const identifier = "identifier";
auto const identifier_def = +alnum;
BOOST_SPIRIT_DEFINE(identifier);

x3::rule<class expression_class, hlsl::ast::Expr> const expression =
    "expression";

x3::rule<class variable_class, hlsl::ast::Variable> const variable = "variable";
x3::rule<class get_class, hlsl::ast::Get> const get = "get";

auto const variable_def = identifier;
BOOST_SPIRIT_DEFINE(variable);

auto const expression_def = get | variable;
BOOST_SPIRIT_DEFINE(expression);
///////////////////////////////////////////////////////////////////////

//////////////////////////////////////////////////////////////////
// get

auto const get_def = (variable | expression) >> '.' >> identifier;
BOOST_SPIRIT_DEFINE(get);
/////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////
struct program_class;

x3::rule<program_class, hlsl::ast::Expr> const program = "program";

auto const program_def = get;

BOOST_SPIRIT_DEFINE(program);

struct program_class : error_handler {};
// struct program_class;

/////////////////////////////////////////////////////////

// } // namespace parser
// } // namespace client

////////////////////////////////////////////////////////////////////////////
//  Main program
////////////////////////////////////////////////////////////////////////////

int main() {

    using boost::spirit::x3::error_handler_tag;
    using boost::spirit::x3::with;
    using iterator_type = std::string::const_iterator;
    using error_handler_type = boost::spirit::x3::error_handler<iterator_type>;

    // input string
    std::string input = "first.Second.third";

    hlsl::ast::Expr fs;
    auto iter = input.begin();
    auto const end = input.end();

    // Our error handler
    error_handler_type error_handler(iter, end, std::cerr);
    auto const parser =
        // we pass our error handler to the parser so we can access
        // it later in our on_error and on_sucess handlers
        with<error_handler_tag>(std::ref(error_handler))[program];
    bool r;

    r = phrase_parse(iter, end, parser, space, fs);

    visitor v;
    if (r) {
        std::cout << "Parse Suceeded\n\n";
        fs.apply_visitor(v);
    } else {
        std::cout << "Sorry :(\n\n";
        std::cout << *iter;
    }
    std::cout << "Bye... :-) \n\n";
    return 0;
}

What I want is something like this

Get {
  object_: Get {
            object_: Variable {
                      name : "first"
                    },
            name_: second
          },
  name_: third     
}

Is this kind of thing even possible using x3 and the way it constructs parsers from grammar?

1 Answers1

1

Sure. Your grammar parses left-to right, and that's also how you want to build your ast (outside-in, not inside out).

I'd rephrase the whole thing:

expression = variable >> *('.' >> identifier);

Now you'll have to massage the attribute propagation as each . member access wraps the previous expression in another Get{expression, name} instance:

x3::rule<struct identifier_, std::string>   const identifier{"identifier"};
x3::rule<struct variable_,   ast::Variable> const variable{"variable"};
x3::rule<struct expression_, ast::Expr>     const expression{"expression"};
x3::rule<struct program_,    ast::Expr>     const program{"program"};

auto identifier_def = x3::lexeme[x3::alpha >> *x3::alnum];
auto variable_def   = identifier;

Now let's use two semantic actions to propagate the expression parts:

auto as_expr = [](auto& ctx) { _val(ctx) = ast::Expr(std::move(_attr(ctx))); };
auto as_get  = [](auto& ctx) {
    _val(ctx) = ast::Get{std::move(_val(ctx)), _attr(ctx)};
};

auto expression_def = variable[as_expr] >> *('.' >> identifier[as_get]);

Let's also bake the skipper into the grammar while we're at it:

auto program_def    = x3::skip(x3::space)[expression];

Live Demo

With a lot of simplifications, e.g. for the AST & visitor:

Live On Coliru

#include <boost/fusion/adapted.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/spirit/home/x3/support/utility/error_reporting.hpp>
#include <iomanip>
#include <iostream>

namespace x3 = boost::spirit::x3;

namespace hlsl {
    namespace ast {
        struct Void {};
        struct Get;

        struct Variable {
            std::string name;
        };

        using Expr = x3::variant<Void, x3::forward_ast<Get>, Variable>;

        struct Get {
            Expr        object_;
            std::string property_;
        };
    } // namespace ast

    struct printer {
        std::ostream& _os;
        using result_type = void;

        void operator()(hlsl::ast::Get const& get) const {
            _os << "get { object_:";
            get.object_.apply_visitor(*this);
            _os << ", property_:" << quoted(get.property_) << " }";
        }

        void operator()(hlsl::ast::Variable const& var) const {
            _os << "var{" << quoted(var.name) << "}";
        };
        void operator()(hlsl::ast::Void const&) const { _os << "void{}"; };
    };

} // namespace hlsl

BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Variable, name)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Get, object_, property_)

namespace hlsl::parser {
    struct eh_tag;

    struct error_handler {
        template <typename It, typename Exc, typename Ctx>
        auto on_error(It&, It, Exc const& x, Ctx const& context) const {
            x3::get<eh_tag>(context)( //
                x.where(), "Error! Expecting: " + x.which() + " here:");

            return x3::error_handler_result::fail;
        }
    };

    struct program_ : error_handler {};

    x3::rule<struct identifier_, std::string>   const identifier{"identifier"};
    x3::rule<struct variable_,   ast::Variable> const variable{"variable"};
    x3::rule<struct expression_, ast::Expr>     const expression{"expression"};
    x3::rule<struct program_,    ast::Expr>     const program{"program"};

    auto as_expr = [](auto& ctx) { _val(ctx) = ast::Expr(std::move(_attr(ctx))); };
    auto as_get  = [](auto& ctx) {
        _val(ctx) = ast::Get{std::move(_val(ctx)), _attr(ctx)};
    };

    auto identifier_def = x3::lexeme[x3::alpha >> *x3::alnum];
    auto variable_def   = identifier;
    auto expression_def = variable[as_expr] >> *('.' >> identifier)[as_get];
    auto program_def    = x3::skip(x3::space)[expression];

    BOOST_SPIRIT_DEFINE(variable, expression, identifier, program);

} // namespace hlsl::parser

int main() {
    using namespace hlsl;

    for (std::string const input :
         {
             "first",
             "first.second",
             "first.Second.third",
         }) //
    {
        std::cout << "===== " << quoted(input) << "\n";
        auto f = input.begin(), l = input.end();

        // Our error handler
        auto const p = x3::with<parser::eh_tag>(
            x3::error_handler{f, l, std::cerr})[hlsl::parser::program];

        if (hlsl::ast::Expr fs; parse(f, l, p, fs)) {
            fs.apply_visitor(hlsl::printer{std::cout << "Parsed: "});
            std::cout << "\n";
        } else {
            std::cout << "Parse failed at " << quoted(std::string_view(f, l)) << "\n";
        }
    }
}

Prints

===== "first"
Parsed: var{"first"}
===== "first.second"
Parsed: get { object_:var{"first"}, property_:"second" }
===== "first.Second.third"
Parsed: get { object_:get { object_:var{"first"}, property_:"Second" }, property_:"third" }

More Simplifications

In the current scenario none of the rules are recursive, so don't need the _DEFINE magic. Assuming you need recursion in the expression later, you could at least remove some redundancy:

namespace hlsl::parser {
    x3::rule<struct expression_, ast::Expr> const expression{"expression"};

    auto as_expr = [](auto& ctx) { _val(ctx) = ast::Expr(std::move(_attr(ctx))); };
    auto as_get  = [](auto& ctx) { _val(ctx) = ast::Get{std::move(_val(ctx)), _attr(ctx)}; };

    auto identifier 
        = x3::rule<void, std::string>{"identifier"} 
        = x3::lexeme[x3::alpha >> *x3::alnum];

    auto variable = x3::rule<void, ast::Variable>{"variable"} = identifier;
    auto expression_def = variable[as_expr] >> *('.' >> identifier)[as_get];
    auto program        = x3::skip(x3::space)[expression];

    BOOST_SPIRIT_DEFINE(expression)
} // namespace hlsl::parser

Note also that the lexeme is important to suppress skipping (Boost spirit skipper issues)

See it Live On Coliru as well.

Oh and for bonus, a version without x3::variant or visitation:

Live On Coliru

#include <boost/fusion/adapted.hpp>
#include <boost/spirit/home/x3.hpp>
#include <iomanip>
#include <iostream>

namespace x3 = boost::spirit::x3;

namespace hlsl::ast {
    struct Void {};
    struct Get;

    struct Variable {
        std::string name;
    };

    using Expr = boost::variant<Void, boost::recursive_wrapper<Get>, Variable>;

    struct Get {
        Expr        object_;
        std::string property_;
    };

    static inline std::ostream& operator<<(std::ostream& os, Void) {
        return os << "void()";
    }
    static inline std::ostream& operator<<(std::ostream& os, Variable const& v) {
        return os << "var{" << std::quoted(v.name) << "}";
    }
    static inline std::ostream& operator<<(std::ostream& os, Get const& g) {
        return os << "get{ object_:" << g.object_ << ", property_:" << quoted(g.property_)
                  << " }";
    }
} // namespace hlsl::ast

BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Variable, name)
BOOST_FUSION_ADAPT_STRUCT(hlsl::ast::Get, object_, property_)

namespace hlsl::parser {
    x3::rule<struct expression_, ast::Expr> const expression{"expression"};

    auto as_expr = [](auto& ctx) { _val(ctx) = ast::Expr(std::move(_attr(ctx))); };
    auto as_get  = [](auto& ctx) { _val(ctx) = ast::Get{std::move(_val(ctx)), _attr(ctx)}; };

    auto identifier 
        = x3::rule<void, std::string>{"identifier"} 
        = x3::lexeme[x3::alpha >> *x3::alnum];

    auto variable = x3::rule<void, ast::Variable>{"variable"} = identifier;
    auto expression_def = variable[as_expr] >> *('.' >> identifier)[as_get];
    auto program        = x3::skip(x3::space)[expression];

    BOOST_SPIRIT_DEFINE(expression)
} // namespace hlsl::parser

int main() {
    using namespace hlsl;

    for (std::string const input :
         {
             "first",
             "first.second",
             "first.Second.third",
         }) //
    {
        std::cout << "===== " << quoted(input) << "\n";
        auto f = input.begin(), l = input.end();

        if (ast::Expr fs; parse(f, l, parser::program, fs)) {
            std::cout << "Parsed: " << fs << "\n";
        } else {
            std::cout << "Parse failed at " << quoted(std::string_view(f, l)) << "\n";
        }
    }
}

Prints just the same:

===== "first"
Parsed: var{"first"}
===== "first.second"
Parsed: get{ object_:var{"first"}, property_:"second" }
===== "first.Second.third"
Parsed: get{ object_:get{ object_:var{"first"}, property_:"Second" }, property_:"third" }

That's >100 lines of code removed. With no functionality sacrificed.

sehe
  • 374,641
  • 47
  • 450
  • 633
  • 1
    Wow thanks @sehe :) I had been reading a lot of your answers on here and hoped that you would respond. you have done in spades. I'll now dive deeply into your answer and learn a few things. Much appreciated :) – Daniel Dokipen Elliott Nov 28 '22 at 04:13