PEG grammars do not handle left-recursion well. In general you have to split out helper rules to write without left-recursion.
In your particular case, the goal production
a ::= ... | A[a] | a1 op a2 | ...
Seems a little off. This would allows foo[bar]
or foo + bar
but not foo + bar[qux]
.
Usually, the choice between array element reference or plain identifier is at a lower level of precedence (often "simple expression").
Here's a tiny elaboration:
literal = number_literal | string_literal; // TODO exapnd?
expr_arr = identifier >> '[' >> (expr_a % ',') >> ']';
simple_expression = literal | expr_arr | identifier;
expr_binary_arith = simple_expression >> op_binary_arith >> expr_a;
expr_a = expr_binary_arith | simple_expression;
Now you can parse e.g.:
for (std::string const& input : {
"A[3*4]",
"A[F[3]]",
"A[8 + F[0x31]]",
"3 * \"foo\"",
})
{
std::cout << std::quoted(input) << " -> ";
It f=begin(input), l=end(input);
AST::Expr e;
if (parse(f,l,g,e)) {
std::cout << "Parsed: " << e << "\n";
} else {
std::cout << "Failed\n";
}
if (f!=l) {
std::cout << "Remaining: " << std::quoted(std::string(f,l)) << "\b";
}
}
Which prints Live On Coliru
"A[3*4]" -> Parsed: A[3*4]
"A[F[3]]" -> Parsed: A[F[3]]
"A[8 + F[0x31]]" -> Parsed: A[8+F[49]]
"3 * \"foo\"" -> Parsed: 3*"foo"
NOTE I deliberately left efficiency and operator precedence out of the picture for now.
These are talked about in detail in other answers:
And many more
Full Demo Listing
Live On Coliru
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <experimental/iterator>
namespace qi = boost::spirit::qi;
namespace AST {
using Var = std::string;
struct String : std::string {
using std::string::string;
};
using Literal = boost::variant<String, intmax_t, double>;
enum class ArithOp {
addition, subtraction, division, multplication
};
struct IndexExpr;
struct BinOpExpr;
using Expr = boost::variant<
Literal,
Var,
boost::recursive_wrapper<IndexExpr>,
boost::recursive_wrapper<BinOpExpr>
>;
struct IndexExpr {
Expr expr;
std::vector<Expr> indices;
};
struct BinOpExpr {
Expr lhs, rhs;
ArithOp op;
};
std::ostream& operator<<(std::ostream& os, Literal const& lit) {
struct {
std::ostream& os;
void operator()(String const& s) const { os << std::quoted(s); }
void operator()(double d) const { os << d; }
void operator()(intmax_t i) const { os << i; }
} vis {os};
boost::apply_visitor(vis, lit);
return os;
}
std::ostream& operator<<(std::ostream& os, ArithOp const& op) {
switch(op) {
case ArithOp::addition: return os << '+';
case ArithOp::subtraction: return os << '-';
case ArithOp::division: return os << '/';
case ArithOp::multplication: return os << '*';
}
return os << '?';
}
std::ostream& operator<<(std::ostream& os, BinOpExpr const& e) {
return os << e.lhs << e.op << e.rhs;
}
std::ostream& operator<<(std::ostream& os, IndexExpr const& e) {
std::copy(
begin(e.indices),
end(e.indices),
std::experimental::make_ostream_joiner(os << e.expr << '[', ","));
return os << ']';
}
}
BOOST_FUSION_ADAPT_STRUCT(AST::IndexExpr, expr, indices)
BOOST_FUSION_ADAPT_STRUCT(AST::BinOpExpr, lhs, op, rhs)
template <typename Iterator, typename Skipper = qi::space_type>
struct G : qi::grammar<Iterator, AST::Expr()> {
G() : G::base_type(start) {
using namespace qi;
identifier = alpha >> *alnum;
number_literal =
qi::real_parser<double, qi::strict_real_policies<double> >{}
| "0x" >> qi::uint_parser<intmax_t, 16> {}
| qi::int_parser<intmax_t, 10> {}
;
string_literal = '"' >> *('\\' >> char_escape | ~char_('"')) >> '"';
literal = number_literal | string_literal; // TODO exapnd?
expr_arr = identifier >> '[' >> (expr_a % ',') >> ']';
simple_expression = literal | expr_arr | identifier;
expr_binary_arith = simple_expression >> op_binary_arith >> expr_a;
expr_a = expr_binary_arith | simple_expression;
start = skip(space) [expr_a];
BOOST_SPIRIT_DEBUG_NODES(
(start)
(expr_a)(expr_binary_arith)(simple_expression)(expr_a)
(literal)(number_literal)(string_literal)
(identifier))
}
private:
struct escape_sym : qi::symbols<char, char> {
escape_sym() {
this->add
("b", '\b')
("f", '\f')
("r", '\r')
("n", '\n')
("t", '\t')
("\\", '\\')
;
}
} char_escape;
struct op_binary_arith_sym : qi::symbols<char, AST::ArithOp> {
op_binary_arith_sym() {
this->add
("+", AST::ArithOp::addition)
("-", AST::ArithOp::subtraction)
("/", AST::ArithOp::division)
("*", AST::ArithOp::multplication)
;
}
} op_binary_arith;
qi::rule<Iterator, AST::Expr()> start;
qi::rule<Iterator, AST::IndexExpr(), Skipper> expr_arr;
qi::rule<Iterator, AST::BinOpExpr(), Skipper> expr_binary_arith;
qi::rule<Iterator, AST::Expr(), Skipper> simple_expression, expr_a;
// implicit lexemes
qi::rule<Iterator, AST::Literal()> literal, string_literal, number_literal;
qi::rule<Iterator, AST::Var()> identifier;
};
int main() {
using It = std::string::const_iterator;
G<It> const g;
for (std::string const& input : {
"A[3*4]",
"A[F[3]]",
"A[8 + F[0x31]]",
"3 * \"foo\"",
})
{
std::cout << std::quoted(input) << " -> ";
It f=begin(input), l=end(input);
AST::Expr e;
if (parse(f,l,g,e)) {
std::cout << "Parsed: " << e << "\n";
} else {
std::cout << "Failed\n";
}
if (f!=l) {
std::cout << "Remaining: " << std::quoted(std::string(f,l)) << "\b";
}
}
}