A few things.
You need to brush up on skippers and lexemes:
Specifically, qi::eol
is part of qi::space
(not qi::blank
). I'd specify the skipper simply as
skip = qi::ascii::space | line_comment;
line_comment = "//" >> *(qi::char_ - qi::eol) >> (qi::eol|qi::eoi);
Even more specifically, you'll /need/ to make sure identifiers are a lexeme. The simplest way is to drop the skipper from the rule's declaration. Otherwise "a b\nc"
is a perfectly valid spelling of the identifier "abc"
.
// lexemes
qi::rule<Iterator, std::string()> primitive_gate, ident;
Next up your sample shows every statement terminated with ';'
. But your grammar says:
statements = statement % ';';
This will allow "S1"
, "S1;S2"
, ... but not "S1;"
. There are several ways to fix it. The simplest would appear to be
statements = +(statement >> ';'); // require exactly one `;` always
Alternatively, if "S1;;;;"
is also acceptable, you might be tempted to say
statements = +(statement >> +qi::lit(';')); // require at least one `;` always
Note though that this would not accept ";;;S1;;"
, nor ""
as you might have expected. A pattern I often employ is the optional element list:
statements = -statement % ';'; // simple and flexible
Which has a nice way of accepting ""
, ";"
, ";;"
, "S1"
, ";;S1;"
etc. Note it's not as efficient as something more verbose like
statements = *(*qi::lit(';') >> statement >> +qi::lit(';')); // require exactly one `;` always
I note you use qi::char_('(')
(and similar) that will expose the matched character in the synthesized attribute. It is highly unlikely this is what you mean. Use qi::lit('(')
instead, or indeed, using bare character/string literals in your parser expression will promote them to parser expressions¹
Consider using BOOST_SPIRIT_DEBUG to gain insight into what your grammar is doing
Encapsulate your skipper, since the caller should not be bothered about it, and you likely do not want users of your grammar to be able to change the skipper (that might break the entire grammar).
Consider using symbols instead of listing keywords, like:
primitive_gate = qi::lit("nand") | "nor" | "and" | "or" | "xor" |
"xnor" | "buf" | "not";
Pay attention to the ordering and keyword matching. If you parse an identifier, a keyword like nand
would match. If you have an identifier like xor21
however, the keyword xor
would match first. You may want/need to guard against this (How to parse reserved words correctly in boost spirit)
Note that the presence of a semantic action (like e.g. the found_smth
) inhibits automatic attribute propagation, unless you use operator%=
to assign the parser expression to the rule.
DEMO TIME
Applying the above...:
Live On Wandbox
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/repository/include/qi_distinct.hpp>
#include <boost/fusion/adapted.hpp>
namespace qi = boost::spirit::qi;
static void found_smth() { std::cout << __PRETTY_FUNCTION__ << "\n"; }
template <typename Iterator> struct verilog_skipper : qi::grammar<Iterator> {
verilog_skipper() : verilog_skipper::base_type(skip) {
skip = qi::ascii::space | line_comment;
line_comment = "//" >> *(qi::char_ - qi::eol) >> (qi::eol|qi::eoi);
}
private:
qi::rule<Iterator> skip;
qi::rule<Iterator> line_comment;
};
template <typename Iterator>
struct verilog_grammar : qi::grammar<Iterator> {
//verilog_ast ckt_ast;
typedef verilog_skipper<Iterator> Skipper;
verilog_grammar() : verilog_grammar::base_type(start) {
namespace phx = boost::phoenix;
using boost::spirit::repository::qi::distinct;
auto kw = distinct(qi::char_("a-zA-Z_0-9"));
start = qi::skip(qi::copy(skipper)) [module];
module = (module_definition >> statements >> kw["endmodule"]);
module_definition = (kw["module"] >> ident >> '(' >> ident_list >> ')' >> ';');
statements = -statement % ';';
statement = input_wires | output_wires | internal_wires | primitive | instance;
input_wires = kw["input"] >> ident_list;
output_wires = kw["output"] >> ident_list;
internal_wires = kw["wire"] >> ident_list;
primitive = primitive_gate >> ident >> '(' >> ident_list >> ')';
instance = ident >> ident >> '(' >> connection_pair_list >> ')';
connection_pair_list = connection_pair % ',';
// NOTE subtle use of `operator%=` in the presence of a semantic action
connection_pair %= (qi::lit('.')[phx::bind(&found_smth)] >> ident
>> '(' >> ident >> ')');
ident_list = ident % ',';
ident = (qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9"));
primitive_gate = qi::raw[kw[primitive_gate_]];
BOOST_SPIRIT_DEBUG_NODES(
(module)(module_definition)(statements)(statement)
(primitive)(primitive_gate)(instance)
(output_wires)(input_wires)(input_wires)
(connection_pair_list)(connection_pair)(ident_list)(ident)
)
}
private:
qi::rule<Iterator> start;
qi::rule<Iterator, Skipper> module;
qi::rule<Iterator, Skipper> module_definition;
qi::rule<Iterator, Skipper> statements;
qi::rule<Iterator, Skipper> statement;
qi::rule<Iterator, Skipper> primitive;
qi::rule<Iterator, std::string()> primitive_gate;
qi::rule<Iterator, Skipper> instance;
qi::rule<Iterator, Skipper> output_wires;
qi::rule<Iterator, Skipper> input_wires;
qi::rule<Iterator, Skipper> internal_wires;
qi::rule<Iterator, std::vector<std::pair<std::string, std::string> >(), Skipper> connection_pair_list;
qi::rule<Iterator, std::pair<std::string, std::string>(), Skipper> connection_pair;
qi::rule<Iterator, std::vector<std::string>(), Skipper> ident_list;
// lexemes
qi::rule<Iterator, std::string()> ident;
struct primitive_gate_t : qi::symbols<char> {
primitive_gate_t() { this->add("nand")("nor")("and")("or")("xor")("xnor")("buf")("not"); }
} primitive_gate_;
Skipper skipper;
};
#include <fstream>
int main() {
std::ifstream ifs("input.txt");
using It = boost::spirit::istream_iterator;
It f(ifs >> std::noskipws), l;
bool ok = qi::parse(f, l, verilog_grammar<It>{});
if (ok)
std::cout << "Parsed\n";
else
std::cout << "Parse failed\n";
if (f!=l)
std::cout << "Remaining unparsed '" << std::string(f,l) << "'\n";
}
Prints:
void found_smth()
void found_smth()
Parsed
Or with debug information enabled (BOOST_SPIRIT_DEBUG
):
<module>
<try>module mymod (A, B);</try>
<module_definition>
<try>module mymod (A, B);</try>
<ident>
<try>mymod (A, B);\n\ninput</try>
<success> (A, B);\n\ninput A, B</success>
<attributes>[[m, y, m, o, d]]</attributes>
</ident>
<ident_list>
<try>A, B);\n\ninput A, B;\n</try>
<ident>
<try>A, B);\n\ninput A, B;\n</try>
<success>, B);\n\ninput A, B;\n\n</success>
<attributes>[[A]]</attributes>
</ident>
<ident>
<try>B);\n\ninput A, B;\n\nXO</try>
<success>);\n\ninput A, B;\n\nXOR</success>
<attributes>[[B]]</attributes>
</ident>
<success>);\n\ninput A, B;\n\nXOR</success>
<attributes>[[[A], [B]]]</attributes>
</ident_list>
<success>\n\ninput A, B;\n\nXOR21</success>
<attributes>[]</attributes>
</module_definition>
<statements>
<try>\n\ninput A, B;\n\nXOR21</try>
<statement>
<try>\n\ninput A, B;\n\nXOR21</try>
<input_wires>
<try>\n\ninput A, B;\n\nXOR21</try>
<input_wires>
<try>\n\ninput A, B;\n\nXOR21</try>
<ident_list>
<try> A, B;\n\nXOR21 gatexo</try>
<ident>
<try>A, B;\n\nXOR21 gatexor</try>
<success>, B;\n\nXOR21 gatexor5</success>
<attributes>[[A]]</attributes>
</ident>
<ident>
<try>B;\n\nXOR21 gatexor5 (</try>
<success>;\n\nXOR21 gatexor5 (.</success>
<attributes>[[B]]</attributes>
</ident>
<success>;\n\nXOR21 gatexor5 (.</success>
<attributes>[[[A], [B]]]</attributes>
</ident_list>
<success>;\n\nXOR21 gatexor5 (.</success>
<attributes>[]</attributes>
</input_wires>
<success>;\n\nXOR21 gatexor5 (.</success>
<attributes>[]</attributes>
</input_wires>
<success>;\n\nXOR21 gatexor5 (.</success>
<attributes>[]</attributes>
</statement>
<statement>
<try>\n\nXOR21 gatexor5 (.A</try>
<input_wires>
<try>\n\nXOR21 gatexor5 (.A</try>
<input_wires>
<try>\n\nXOR21 gatexor5 (.A</try>
<fail/>
</input_wires>
<fail/>
</input_wires>
<output_wires>
<try>\n\nXOR21 gatexor5 (.A</try>
<fail/>
</output_wires>
<primitive>
<try>\n\nXOR21 gatexor5 (.A</try>
<primitive_gate>
<try>XOR21 gatexor5 (.A(B</try>
<fail/>
</primitive_gate>
<fail/>
</primitive>
<instance>
<try>\n\nXOR21 gatexor5 (.A</try>
<ident>
<try>XOR21 gatexor5 (.A(B</try>
<success> gatexor5 (.A(B) , .</success>
<attributes>[[X, O, R, 2, 1]]</attributes>
</ident>
<ident>
<try>gatexor5 (.A(B) , .C</try>
<success> (.A(B) , .C(D));\nen</success>
<attributes>[[g, a, t, e, x, o, r, 5]]</attributes>
</ident>
<connection_pair_list>
<try>.A(B) , .C(D));\nendm</try>
<connection_pair>
<try>.A(B) , .C(D));\nendm</try>
<ident>
<try>A(B) , .C(D));\nendmo</try>
<success>(B) , .C(D));\nendmod</success>
<attributes>[[A]]</attributes>
</ident>
<ident>
<try>B) , .C(D));\nendmodu</try>
<success>) , .C(D));\nendmodul</success>
<attributes>[[B]]</attributes>
</ident>
<success> , .C(D));\nendmodule</success>
<attributes>[[[A], [B]]]</attributes>
</connection_pair>
<connection_pair>
<try> .C(D));\nendmodule\n</try>
<ident>
<try>C(D));\nendmodule\n</try>
<success>(D));\nendmodule\n</success>
<attributes>[[C]]</attributes>
</ident>
<ident>
<try>D));\nendmodule\n</try>
<success>));\nendmodule\n</success>
<attributes>[[D]]</attributes>
</ident>
<success>);\nendmodule\n</success>
<attributes>[[[C], [D]]]</attributes>
</connection_pair>
<success>);\nendmodule\n</success>
<attributes>[[[[A], [B]], [[C], [D]]]]</attributes>
</connection_pair_list>
<success>;\nendmodule\n</success>
<attributes>[]</attributes>
</instance>
<success>;\nendmodule\n</success>
<attributes>[]</attributes>
</statement>
<statement>
<try>\nendmodule\n</try>
<input_wires>
<try>\nendmodule\n</try>
<input_wires>
<try>\nendmodule\n</try>
<fail/>
</input_wires>
<fail/>
</input_wires>
<output_wires>
<try>\nendmodule\n</try>
<fail/>
</output_wires>
<primitive>
<try>\nendmodule\n</try>
<primitive_gate>
<try>endmodule\n</try>
<fail/>
</primitive_gate>
<fail/>
</primitive>
<instance>
<try>\nendmodule\n</try>
<ident>
<try>endmodule\n</try>
<success>\n</success>
<attributes>[[e, n, d, m, o, d, u, l, e]]</attributes>
</ident>
<ident>
<try></try>
<fail/>
</ident>
<fail/>
</instance>
<fail/>
</statement>
<success>\nendmodule\n</success>
<attributes>[]</attributes>
</statements>
<success>\n</success>
<attributes>[]</attributes>
</module>
¹ as long as one operand involved in the expression is from the Qi proto-expression domain