I've been learning C++ and boost spirit lately to help my team parsing complex structure into AST then to XML. Blessing with a lot of helps from this community (mostly from Sehe), thing been moving pretty good. With my limit knowledge of C++ & boost, I'm stuck again...:( What's new right?
I'm trying to parse the following structure:
If (Var1 == "Test" && Var2 <= 10 && Var3 == "Done")
Verify Word 32 Objective;
If ((Var3 == "A" || Var4 == "B") && Var5 > 0)
Assign VarName "Value1";
Assign Var2 10;
Elseif (Var3 == "C")
Assign VarName "SomeValue"
End If;
Else
Assign VarName "Value2"
EndIf;
Notes
- If statement will follow this format: If-ElseIf-Else-EndIf;
- or **If-Else-EndIf; or If-EndIf;
- Inside the If and else (or elseif) blocks, there could be for-loop, while-loop and/or simple statement like the example above.
The expected XML output the code block above is like this:
<if>
<bioperator>
&&
<bioperator>
&&
<bioperator>
==
<variant>Var1</variant>
<literal>"Test"</literal>
</bioperator>
<bioperator>
<=
<variant>Var2/variant>
<num>10</num>
</bioperator>
</bioperator>
<bioperator>
==
<variant>Var3/variant>
<literal>"DONE"</literal>
</bioperator>
</bioperator>
<then>
<verify>
<word>Word</word>
<num>32</num>
<obj>Objective</obj>
</verify>
<if>
<bioperator>
&&
<bioperator>
||
<bioperator>
==
<variant>Var3</variant>
<literal>"A"</literal>
</bioperator>
<bioperator>
==
<variant>Var4</variant>
<literal>"B"</literal>
</bioperator>
</bioperator>
<bioperator>
>
<variant>Var5</variant>
<num>0</num>
</bioperator>
</bioperator>
<then>
<assign>
<variant>VarName</variant>
<literal>"Value1"</literal>
</assign>
<assign>
<variant>Var2</variant>
<num>10</num>
</assign>
</then>
<elseif>
<bioperator>
==
<variant>Var3</variant>
<literal>"C"</literal>
</bioperator>
<assign>
<variant>VarName</variant>
<literal>"Value2"</literal>
</assign>
</elseif>
</if>
</then>
<else>
<assign>
<variant>VarName</variant>
<literal>"Value2"</literal>
</assign>
</else>
</if>
Output Notes:
- Only if block has
tag<then>...</then>
tag is nested.<bioperator>
For simplicity, I'm using the working example that sehe help me before and extend from it to add capability to parse the above.
FULL CODE
#define BOOST_SPIRIT_DEBUG 1
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/phoenix/phoenix.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
namespace Ast {
using boost::recursive_wrapper;
template <typename> struct custom_string : std::char_traits<char> {};
template <typename Tag>
using String = std::basic_string<char, custom_string<Tag> >;
using Identifier = String<struct TagId>;
using Literal = String<struct TagLiteral>;
using Variant = String<struct TagVariant>;
using Word = String<struct TagWord>;
using Obj = String<struct TagObj>;
using BinOp = String<struct TagOp>;
using Datatype = String<struct TagDatatype>;
struct Base {
Identifier id;
Literal literal;
};
using Ids = std::vector<Identifier>;
using Enum = Ids;
using Number = double;
using Value = boost::variant<Literal, Number, Identifier, Variant>;
struct Simple : Base {
boost::optional<Enum> enumeration;
boost::optional<Datatype> datatype;
boost::optional<Value> default_;
};
struct Complex;
struct Container;
;
using Class = boost::variant<
Simple,
recursive_wrapper<Complex>,
recursive_wrapper<Container>
>;
using Classes = std::vector<Class>;
struct Container : Base { Class element; };
struct Complex : Base { Ids bases; Classes members; };
// Expression block
struct Verify {
Word word;
Number num;
Obj obj;
};
struct Assign {
Variant var;
Value value;
};
struct Bioperator;
struct Conditional;
using Expression = boost::variant<
Value,
Verify,
Assign,
recursive_wrapper<Bioperator>,
recursive_wrapper<Conditional>
>;
struct Bioperator {
Variant var;
BinOp op;
Value value;
};
struct Conditional {
Expression condition, true_block;
boost::optional<Expression> false_block;
};
using Code = boost::variant<Conditional, Verify, Assign>;
using Task = std::vector<boost::variant<Class, Code>>;
} // namespace Ast
// Classes
BOOST_FUSION_ADAPT_STRUCT(Ast::Simple, id, literal, enumeration, datatype, default_)
BOOST_FUSION_ADAPT_STRUCT(Ast::Complex, id, literal, bases, members)
BOOST_FUSION_ADAPT_STRUCT(Ast::Container, id, literal, element)
// Expressions
BOOST_FUSION_ADAPT_STRUCT(Ast::Verify, word, num, obj);
BOOST_FUSION_ADAPT_STRUCT(Ast::Assign, var, value);
BOOST_FUSION_ADAPT_STRUCT(Ast::Bioperator, var, op, value);
BOOST_FUSION_ADAPT_STRUCT(Ast::Conditional, condition, true_block, false_block);
namespace Parser {
template <typename It> struct Task : qi::grammar<It, Ast::Task()> {
Task() : Task::base_type(start) {
using namespace qi;
start = skip(space)[task_];
// lexemes:
id_ = raw[alpha >> *(alnum | '_' | ':')];
variant_ = id_;
word_ = variant_;
obj_ = word_;
literal_ = '"' > *('\\' >> char_ | ~char_('"')) > '"';
auto optlit = copy(literal_ | attr(std::string(" ")));
task_ = *task_item > eoi;
task_item = class_ | code_;
subclass_ = simple_class_ | complex_ | container_;
class_ = lit("Class") > subclass_ > ';';
simple_class_ = lit("Simple") >> id_ >> optlit >> -enum_ >> -datatype_ >> -default_;
inherit_ = lit("Inherit") >> id_;
complex_ = lit("Complex") >> id_ >> optlit >> '(' >> *inherit_ >> *subclass_ >> ')';
container_ = lit("Container") >> id_ >> optlit >> '(' >> subclass_ > ')';
enum_ = lit("enumeration") >> '(' >> -(id_ % ',') > ')';
datatype_ = lit("datatype") >> id_;
value_ = literal_ | number_ | id_;
number_ = double_;
default_ = lit("Default") >> value_;
// Expression
code_ = conditional_ | assign_ | verify_; // more to come
expr_ = simple_expr | assign_ | verify_;// | *(boolop_ >> bioperator_);
simple_expr = value_ | bioperator_ | verify_ | assign_ | conditional_;
bioperator_ = '(' >> variant_ >> binop_ >> value_ >> ')';
assign_ = no_case["assign"] >> variant_ >> value_ > ';';
verify_ = no_case["verify"] >> word_ >> number_ >> obj_ > ';';
conditional_
= no_case["if"] >> '(' >> expr_ >> ')' >> expr_
//>> -((lit("else") | lit("elseif")) >> expr_) // else & elseif
>> no_case["endif"] > ';'
;
//elsepart_ = no_case[lit("else")] >> expr_;
//elseifpart_ = no_case["elseif"] >> conditional_;
binop_ = string("==") | string("!=") | string(">") | string(">=") | string("<") | string("<=");
boolop_ = string("||") | string("&&");
BOOST_SPIRIT_DEBUG_NODES(
(task_)(task_item)(class_)(subclass_)(simple_class_)(complex_)(container_)(enum_)(datatype_)(default_)(inherit_)
(id_)(literal_)(variant_)(word_)(value_)(number_)(obj_)
(expr_)(verify_)(assign_)(conditional_)(assign_)(binop_)(boolop_)
)
}
private:
qi::rule<It, Ast::Task()> start;
using Skipper = qi::space_type;
qi::rule<It, Ast::Task(), Skipper> task_, task_item;
qi::rule<It, Ast::Class(), Skipper> class_, subclass_;
qi::rule<It, Ast::Simple(), Skipper> simple_class_;
qi::rule<It, Ast::Complex(), Skipper> complex_;
qi::rule<It, Ast::Container(), Skipper> container_;
qi::rule<It, Ast::Enum(), Skipper> enum_;
qi::rule<It, Ast::Datatype(), Skipper> datatype_;
qi::rule<It, Ast::Value(), Skipper> default_;
qi::rule<It, Ast::Identifier(), Skipper> inherit_;
qi::rule<It, Ast::Verify(), Skipper> verify_;
qi::rule<It, Ast::Assign(), Skipper> assign_;
qi::rule<It, Ast::Code(), Skipper> code_;
qi::rule<It, Ast::Expression(), Skipper> expr_, simple_expr;
qi::rule<It, Ast::Conditional(), Skipper> conditional_, elsepart_, elseifpart_;
qi::rule<It, Ast::Bioperator(), Skipper> bioperator_;
// lexemes:
qi::rule<It, Ast::Identifier()> id_;
qi::rule<It, Ast::Literal()> literal_;
qi::rule<It, Ast::Variant()> variant_;
qi::rule<It, Ast::Word()> word_;
qi::rule<It, Ast::Obj()> obj_;
qi::rule<It, Ast::Value()> value_;
qi::rule<It, Ast::Number()> number_;
qi::rule<It, Ast::BinOp()> binop_, boolop_;
};
}
#include <pugixml.hpp>
namespace Generate {
using namespace Ast;
struct XML {
using Node = pugi::xml_node;
// callable for variant visiting:
template <typename T> void operator()(Node parent, T const& node) const { apply(parent, node); }
private:
template <typename... Ts>
void apply(Node parent, boost::variant<Ts...> const& v) const {
using std::placeholders::_1;
boost::apply_visitor(std::bind(*this, parent, _1), v);
}
void apply(Node parent, Number const& num) const {
named_child(parent, "num").text().set(num);
}
void apply(Node parent, Identifier const& id) const {
named_child(parent, "identifier").text().set(id.c_str());
}
void apply(Node parent, Obj const& o) const {
named_child(parent, "obj").text().set(o.c_str());
}
void apply(Node parent, Word const& w) const {
named_child(parent, "word").text().set(w.c_str());
}
void apply(Node parent, Variant const& v) const {
named_child(parent, "variant").text().set(v.c_str());
}
void apply(Node parent, Literal const& literal) const {
named_child(parent, "literal").text().set(literal.c_str());
}
void apply(Node parent, Datatype const& datatype) const {
named_child(parent, "datatype").text().set(datatype.c_str());
}
template <typename T> void apply(Node parent, boost::optional<T> const& opt) const {
if (opt)
apply(parent, *opt);
}
void apply(Node parent, Simple const& s) const {
auto simple = named_child(parent, "simple");
apply(simple, s.id);
apply(simple, s.literal);
apply(simple, s.enumeration);
apply(simple, s.datatype);
if (s.default_.has_value()) {
apply(named_child(simple, "default"), *s.default_);
}
}
void apply(Node parent, Enum const& e) const {
auto enum_ = named_child(parent, "enumeration");
for (auto& v : e)
named_child(enum_, "word").text().set(v.c_str());
}
void apply(Node parent, Complex const& c) const {
auto complex_ = named_child(parent, "complex");
apply(complex_, c.id);
for (auto& base : c.bases)
apply(named_child(complex_, "inherit"), base);
apply(complex_, c.literal);
for (auto& m : c.members)
apply(complex_, m);
}
void apply(Node parent, Container const& c) const {
auto cont = named_child(parent, "container");
apply(cont, c.id);
apply(cont, c.literal);
apply(cont, c.element);
}
void apply(Node parent, Assign const& a) const {
auto asn_ = named_child(parent, "assign");
apply(asn_, a.var);
apply(asn_, a.value);
}
void apply(Node parent, Verify const& v) const {
auto v_ = named_child(parent, "verify");
apply(v_, v.word);
apply(v_, v.num);
apply(v_, v.obj);
}
//void apply(Node parent, Bioperator const& bo) const {
// auto botag = named_child(parent, "bioperator").text().set(bo.op.c_str());
// //apply(botag, bo.var);
// //apply(botag, bo.value);
//}
void apply(Node parent, Conditional const& c) const {
auto task = named_child(parent, "not-implement-yet");
}
void apply(Node parent, Task const& t) const {
auto task = named_child(parent, "task");
for (auto& item : t) {
std::string node = "class";
if (item.type() == typeid(Code)) {
apply(task, item);
}
else if (item.type() == typeid(Class)) {
apply(task.append_child("class"), item);
}
}
}
private:
Node named_child(Node parent, std::string const& name) const {
auto child = parent.append_child();
child.set_name(name.c_str());
return child;
}
};
} // namespace Generate
int main() {
using It = std::string::const_iterator;
static const Parser::Task<It> p;
static const Generate::XML to_xml;
for (std::string const input : {
R"(
If (Var1 == "Test" && Var2 <= 10 && Var3 == "Done")
Verify Word 32 Objective;
If ((Var3 == "A" || Var4 == "B") && Var5 > 0)
Assign VarName "Value1";
Assign Var2 10;
Elseif (Var3 == "C")
Assign VarName "SomeValue"
End If;
Else
Assign VarName "Value2"
EndIf;
)"
}) {
try {
Ast::Task t;
if (qi::parse(begin(input), end(input), p, t)) {
pugi::xml_document doc;
to_xml(doc.root(), t);
doc.print(std::cout, " ", pugi::format_default);
std::cout << std::endl;
}
else {
std::cout << " -> INVALID" << std::endl;
}
}
catch (qi::expectation_failure<It> const& ef) {
auto f = begin(input);
auto p = ef.first - input.begin();
auto bol = input.find_last_of("\r\n", p) + 1;
auto line = std::count(f, f + bol, '\n') + 1;
auto eol = input.find_first_of("\r\n", p);
std::cerr << " -> EXPECTED " << ef.what_ << " in line:" << line << "\n"
<< input.substr(bol, eol - bol) << "\n"
<< std::setw(p - bol) << ""
<< "^--- here" << std::endl;
}
}
}
I can't seem to compose the grammar to handle the recursive (nested) if-else. The above is what I have. The grammar does not seem correct as it won't parse the example text.
Any help would be greatly appreciated.
Thanks