1

I need to convert infix notations like the one below to n-ary prefix notation with Boost::Spirit, but I am failing at building on the answers from https://stackoverflow.com/a/8707598/1816477 et al.

This is what I am trying to parse:

not (xyz='a' or xyz='b' or xyz='c') and abc='s' xor (pqr ='v' and xyz='d')

and this LISP-styled format is what I am trying to provide as output (do not mind the indentation):

(xor (and (= pqr 'v') (= xyz 'd'))
     (and (= abc 's')
          (not (or (= xyz 'a')
                   (= xyz 'b')
                   (= xyz 'c')))))

So, the terms I try to parse consist of prefixed (not <expression>) and infix expressions (<expression> and <expression> and ... etc.), i.e.: assignments, negations and n-ary ands, ors, xors etc., implying operator precedence (or < xor < and < assignment < negation).

What I am failing at is getting the grammar right. Outputting to a suitable boost::variant representing the parsed boolean expression I think I am able to accomplish. I am thinking of an output structure like this one:

struct prefixExpr;
struct infixExpr;

typedef boost::variant<
    std::string,    // identifiers, values etc.
    boost::recursive_wrapper<prefixExpr>,   // e.g. negation
    boost::recursive_wrapper<infixExpr>     // assignment, and, or, xor etc.
> expression;

struct prefixExpr {
    std::string op;    // currently only "not"
    expression expr;
};
BOOST_FUSION_ADAPT_STRUCT(prefixExpr, op, expr)

struct infixExpr {
    std::string op;    // "and", "or", "xor", "="
    std::vector<expression> exprs;
};
BOOST_FUSION_ADAPT_STRUCT(infixExpr, op, exprs)

What do I need to do to be able to parse expressions like the one mentioned above and convert them to a prefix notation?

I am using the boost 1.67.0 (the latest at the time of writing) and Visual Studio 15.7.3 (also the latest at the time of writing).

Jinxed
  • 356
  • 2
  • 16
  • What do you suggest is the evaluation order between `and` and `xor` in that sample? Will you do n-ary `xor`? How? What are the rules regarding to parenthesizing sub-expressions? If parenthesizing is optional we need to have evaluation order specified. (So, associativity and precedence) – sehe Jun 13 '18 at 20:55
  • I tried adapting from your example, but failed. xor is defined for multiple boolean variables, so: yes, n-ary, too. The precedence in ascending order would be: or, xor, and, negation. The only thing I would need is a valid grammar, not the complete boost-shebang. This I can do myself, but I seem to fail on the grammar. – Jinxed Jun 20 '18 at 14:23

1 Answers1

3

The code is not perfect but should be simple to understand:

#include <boost/variant.hpp>
#include <boost/spirit/home/x3.hpp>
#include <vector>
#include <string>
#include <iostream>


struct id : std::string {};
struct value : std::string {};
struct nary_expr;

using expr = boost::variant<
    id, value,
    boost::recursive_wrapper<nary_expr>
>;


struct nary_expr
{
    std::string op;
    std::vector<expr> exprs;
};


namespace x3 = boost::spirit::x3;

auto compose_nary_expr = [](auto& ctx)
{
    //auto&& [left, tail] = x3::_attr(ctx);
    auto&& left = boost::fusion::at_c<0>(x3::_attr(ctx));
    auto&& tail = boost::fusion::at_c<1>(x3::_attr(ctx));

    if (tail.size() == 0) {
        x3::_val(ctx) = left;
        return;
    }

    // left associativity
    auto op = boost::fusion::at_c<0>(tail[0]);
    std::vector<expr> exprs = { left, boost::fusion::at_c<1>(tail[0]) };
    for (std::size_t i = 1; i < tail.size(); ++i) {
        // same priority but different operator
        auto&& next_op = boost::fusion::at_c<0>(tail[i]);
        if (op != next_op) {
            exprs = std::vector<expr>{ nary_expr{ op, std::move(exprs) } };
            op = next_op;
        }
        exprs.push_back(boost::fusion::at_c<1>(tail[i]));
    }
    x3::_val(ctx) = nary_expr{ op, std::move(exprs) };
};

x3::rule<class prec4_expr_rule, expr> const prec4_expr("prec4_expr");
x3::rule<class prec3_expr_rule, expr> const prec3_expr("prec3_expr");
x3::rule<class prec2_expr_rule, expr> const prec2_expr("prec2_expr");
x3::rule<class prec1_expr_rule, expr> const prec1_expr("prec1_expr");
x3::rule<class prec0_expr_rule, expr> const prec0_expr("prec0_expr");

auto const prec4_expr_def = prec4_expr = (
    prec3_expr
    >> *(   (x3::string("or") > prec3_expr)
        )
    )[compose_nary_expr];

auto const prec3_expr_def = prec3_expr = (
    prec2_expr
    >> *(   (x3::string("xor") > prec2_expr)
        )
    )[compose_nary_expr];

auto const prec2_expr_def = prec2_expr = (
    prec1_expr
    >> *(   (x3::string("and") > prec1_expr)
        )
    )[compose_nary_expr];


auto compose_binary_expr = [](auto& ctx)
{
    auto&& rhs = boost::fusion::at_c<0>(x3::_attr(ctx));
    auto&& tail = boost::fusion::at_c<1>(x3::_attr(ctx));
    if (tail.size() > 0) {
        auto&& op = boost::fusion::at_c<0>(tail[0]);
        auto&& lhs = boost::fusion::at_c<1>(tail[0]);
        x3::_val(ctx) = nary_expr{ op, { rhs, lhs } };
    }
    else {
        x3::_val(ctx) = rhs;
    }
};


// should use optional, but something wrong with spirit
auto const prec1_expr_def = prec1_expr = (
    prec0_expr >> *(x3::string("=") > prec0_expr)
    )[compose_binary_expr];



x3::rule<class not_expr_rule, expr> const not_expr("not_expr");

auto compose_unary_expr = [](auto& ctx)
{
    //auto&& [op, expr] = x3::_attr(ctx);
    auto&& op = boost::fusion::at_c<0>(x3::_attr(ctx));
    auto&& expr = boost::fusion::at_c<1>(x3::_attr(ctx));
    x3::_val(ctx) = nary_expr{ op, { expr } };
};

auto const not_expr_def = not_expr = (x3::string("not") > prec0_expr)[compose_unary_expr];
auto const id_term = x3::rule<class id_r, id>{} = x3::lexeme[x3::alpha >> *x3::alnum];
auto const value_term = x3::rule<class value_r, value>{} = x3::lexeme["'" > +~x3::char_('\'') >> "'"];

auto const prec0_expr_def =
      value_term
    | ( '(' > prec4_expr >> ')' )
    | not_expr
    | id_term
    ;


BOOST_SPIRIT_DEFINE(
    prec0_expr
  , prec1_expr
  , prec2_expr
  , prec3_expr
  , prec4_expr
  , not_expr
);


struct indent
{
    std::size_t cur;
};

indent operator+(indent lhs, std::size_t rhs)
{
    return { lhs.cur + rhs };
}

std::ostream& operator<<(std::ostream& os, indent const& v)
{
    for (unsigned i = 0; i < v.cur; ++i) os << ' ';
    return os;
}

struct is_simple
{
    template <typename T>
    bool operator()(T const&) const
    {
        return std::is_same<T, id>::value || std::is_same<T, value>::value;
    }
};

struct printer
{
    indent indent_;

    void operator()(id const& v)
    {
        std::cout << v;
    }

    void operator()(value const& v)
    {
        std::cout << '\'' << v << '\'';
    }

    void operator()(nary_expr const& v)
    {
        std::cout << '(' << v.op << ' ';
        printer p{ indent_ + 2 + v.op.size() };
        boost::apply_visitor(p, v.exprs[0]);
        for (std::size_t i = 1; i < v.exprs.size(); ++i) {
            if (boost::apply_visitor(is_simple{}, v.exprs[i])) {
                std::cout << ' ';
            }
            else {
                std::cout << '\n' << p.indent_;
            }
            boost::apply_visitor(p, v.exprs[i]);
        }
        std::cout << ')';
    }
};

int main()
{
    std::string s = "not (xyz='a' or xyz='b' or xyz='c') and abc='s' xor (pqr ='v' and xyz='d')";
    expr expr;
    auto iter = s.cbegin();
    if (phrase_parse(iter, s.cend(), prec4_expr_def, x3::space, expr) && iter == s.cend()) {
        boost::apply_visitor(printer{}, expr);
    }

    return 0;
}

It prints:

(xor (and (not (or (= xyz 'a')
                   (= xyz 'b')
                   (= xyz 'c')))
          (= abc 's'))
     (and (= pqr 'v')
          (= xyz 'd')))
Nikita Kniazev
  • 3,728
  • 2
  • 16
  • 30