In response to the "SSCCE" code added to the question:
The AST
The only way to treat nested defines correctly (including the case where conditional blocks contain #define
/#undef
directives!) is to use an AST that represents a tree of the blocks¹:
namespace Common {
typedef std::string pp_sym;
}
namespace Ast {
using Common::pp_sym;
typedef std::string Str;
typedef std::pair<Str, Str> Pair;
typedef std::vector<Pair> Pairs;
struct ConditionalBlock;
namespace tag {
struct define;
struct undefine;
}
template <typename Tag> struct Directive {
pp_sym name;
};
typedef Directive<tag::define> Define;
typedef Directive<tag::undefine> Undef;
typedef boost::make_recursive_variant<
Pairs,
boost::recursive_wrapper<ConditionalBlock>,
Define,
Undef
>::type Block;
typedef std::vector<Block> Blocks;
struct ConditionalBlock {
pp_sym required;
Blocks if_, else_;
};
}
To facilitate parsing these without ever using a semantic action:
BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::Directive)(Tag), name)
BOOST_FUSION_ADAPT_STRUCT(Ast::ConditionalBlock, required, if_, else_)
Done.
Parsing
Because of the above work, we can now define the parser exactly as we would have liked!
Notes:
- now using a skipper to avoid having hardcoded amounts of whitespace required or whitespace intolerance
- now using
seek[eol]
to ignore until the end of a line
- now using
distinct
to parse identifiers (see boost::spirit::qi keywords and identifiers)
- Now made the appearance of
#else
optional (see -else
)
- Removes all semantic actions
Enables debug information without any more work
start = skip(blank) [ blocks ];
blocks = *block;
block = define | undef | conditional_block | +pair;
pair = !char_("#") >> +~char_("=\r\n") >> '=' >> *(char_ - eol) >> *eol;
pp_symbol = qr::distinct(char_("A-Za-z_")) [ +char_("A-Za-z_") ];
define = '#' >> distinct(alnum | '_') [ "define" ] >> pp_symbol >> seek[*eol];
undef = '#' >> distinct(alnum | '_') [ "undef" ] >> pp_symbol >> seek[*eol];
else_ = '#' >> distinct(alnum | '_') [ "else" ] >> seek[*eol];
endif = '#' >> distinct(alnum | '_') [ "endif" ] >> seek[*eol];
conditional_block =
('#' >> distinct(alnum | '_') [ "ifdef" ] >> pp_symbol >> seek[*eol])
>> *(!(else_|endif) >> block)
>> -else_
>> *(!endif >> block)
>> endif
;
BOOST_SPIRIT_DEBUG_NODES((start)(blocks)(block)(pair)(pp_symbol)(define)(undef)(else_)(endif)(conditional_block))
I'd say that is pretty legible, and it results in the ast containing all the information you could want to use later
Processing Logic
Now that we've separated the processing from the parsing, processing is a single visitation of the tree. We use a single function object Logic::Preprocessor
that doubles as the variant visitor:
Logic::Preprocess pp({{"EXTERNAL"}} , " ");
pp(ast);
In this sample, we start with the preprocessor symbol EXTERNAL
defined (as if it was defined "externally", like on a command line).
The implementation of the visitor is pretty straightforward, but let me show the action bits, namely where conditions are taken and branches ignored. To make things very complete I even traverse the branches that are not satisfied, just to show that the full AST is there, but with en isolated
instance of the function object so there is no effect:
void operator()(Ast::ConditionalBlock const& cb) const {
bool const satisfied = ctx.defined.count(cb.required);
auto old_indent = indent;
indent += "\t";
std::cout << old_indent << "#ifdef " << cb.required << " // " << std::boolalpha << satisfied << "\n";
Preprocess isolated{ctx, indent+"// "}; // prevent changes to ctx to affect us for the non-matching branch
(satisfied? *this : isolated)(cb.if_);
std::cout << old_indent << "#else " << " // ifdef " << cb.required << "\n";
(satisfied? isolated : *this)(cb.else_);
std::cout << old_indent << "#endif " << " // ifdef " << cb.required << "\n";
indent.resize(indent.size()-1);
}
void operator()(Ast::Define const& directive) const {
ctx.defined.insert(directive.name);
std::cout << indent << "#define\t" << directive.name;
report();
}
void operator()(Ast::Undef const& directive) const {
ctx.defined.erase(directive.name);
std::cout << indent << "#undef\t" << directive.name;
report();
}
Demo
Observe how this document, which even nests conditional blocks and defines symbols from within conditional branches (so, conditionally), is correctly interpreted:
#define FOO
led_zeppelin=9
the_shins=9
dead_mau5=6
portishead=10
#ifdef FOO
foo_fighters=7
#define ZOO
#else
the_who=6
#define QUX
#endif
#ifdef EXTERNAL
#ifdef ZOO
zoowasdefined=yes
#else
zoowasdefined=no
#endif
#ifdef QUX
quxwasdefined=yes
#else
quxwasdefined=no
#endif
#endif
kanye_west=4
#undef FOO
#define BAR
Our demo program prints: Live On Coliru
Preprocess results:
#define FOO // effective: EXTERNAL FOO
led_zeppelin=9
the_shins=9
dead_mau5=6
portishead=10
#ifdef FOO // true
foo_fighters=7
#define ZOO // effective: EXTERNAL FOO ZOO
#else // ifdef FOO
// the_who=6
// #define QUX // effective: EXTERNAL FOO QUX
#endif // ifdef FOO
#ifdef EXTERNAL // true
#ifdef ZOO // true
zoowasdefined=yes
#else // ifdef ZOO
// zoowasdefined=no
#endif // ifdef ZOO
#ifdef QUX // false
// quxwasdefined=yes
#else // ifdef QUX
quxwasdefined=no
#endif // ifdef QUX
#else // ifdef EXTERNAL
#endif // ifdef EXTERNAL
kanye_west=4
#undef FOO // effective: EXTERNAL ZOO
#define BAR // effective: BAR EXTERNAL ZOO
Defines still in effect: BAR EXTERNAL ZOO
Full Listing
Live On Coliru
#define BOOST_SPIRIT_USE_PHOENIX_V3
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_distinct.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/variant.hpp>
#include <cassert>
namespace phx = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace qr = boost::spirit::repository::qi;
namespace Common {
typedef std::string pp_sym;
}
namespace Ast {
using Common::pp_sym;
typedef std::string Str;
typedef std::pair<Str, Str> Pair;
typedef std::vector<Pair> Pairs;
struct ConditionalBlock;
namespace tag {
struct define;
struct undefine;
}
template <typename Tag> struct Directive {
pp_sym name;
};
typedef Directive<tag::define> Define;
typedef Directive<tag::undefine> Undef;
typedef boost::make_recursive_variant<
Pairs,
boost::recursive_wrapper<ConditionalBlock>,
Define,
Undef
>::type Block;
typedef std::vector<Block> Blocks;
struct ConditionalBlock {
pp_sym required;
Blocks if_, else_;
};
}
BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::Directive)(Tag), name)
BOOST_FUSION_ADAPT_STRUCT(Ast::ConditionalBlock, required, if_, else_)
/***
* Grammar definitions
*/
template <typename Iterator>
struct simple_grammar : qi::grammar<Iterator, Ast::Blocks()> {
simple_grammar() : simple_grammar::base_type(start)
{
using namespace qi;
using qr::distinct;
using qr::seek;
start = skip(blank) [ blocks ];
blocks = *block;
block = define | undef | conditional_block | +pair;
pair = +~char_("=\r\n") >> '=' >> *(char_ - eol) >> *eol;
pp_symbol = qr::distinct(char_("A-Za-z_")) [ +char_("A-Za-z_") ];
define = '#' >> distinct(alnum | '_') [ "define" ] >> pp_symbol >> seek[*eol];
undef = '#' >> distinct(alnum | '_') [ "undef" ] >> pp_symbol >> seek[*eol];
else_ = '#' >> distinct(alnum | '_') [ "else" ] >> seek[*eol];
endif = '#' >> distinct(alnum | '_') [ "endif" ] >> seek[*eol];
conditional_block =
('#' >> distinct(alnum | '_') [ "ifdef" ] >> pp_symbol >> seek[*eol])
>> *(!(else_|endif) >> block)
>> -else_
>> *(!endif >> block)
>> endif
;
BOOST_SPIRIT_DEBUG_NODES((start)(blocks)(block)(pair)(pp_symbol)(define)(undef)(else_)(endif)(conditional_block))
}
private:
using Skipper = qi::blank_type;
qi::rule<Iterator, Ast::Blocks()> start;
qi::rule<Iterator, Ast::Blocks(), Skipper> blocks;
qi::rule<Iterator, Ast::Block(), Skipper> block;
// directive
qi::rule<Iterator, Ast::ConditionalBlock(), Skipper> conditional_block;
qi::rule<Iterator, Ast::Define(), Skipper> define;
qi::rule<Iterator, Ast::Undef(), Skipper> undef;
// empty directives
qi::rule<Iterator, Skipper> else_, endif;
// lexeme
qi::rule<Iterator, Ast::Pair()> pair;
qi::rule<Iterator, Ast::pp_sym()> pp_symbol;
};
namespace Logic {
using Common::pp_sym;
typedef std::set<pp_sym> pp_syms;
struct context {
pp_syms defined;
};
struct Preprocess : boost::static_visitor<void> {
context ctx;
std::string indent;
Preprocess(context ctx = {}, std::string indent = "")
: ctx(std::move(ctx)), indent(std::move(indent))
{ }
void operator()(Ast::Blocks const& blocks) {
for (auto& b : blocks)
boost::apply_visitor(*this, b);
}
void operator()(Ast::Block const& block) {
boost::apply_visitor(*this, block);
}
void operator()(Ast::Pairs const& pairs) {
for (auto& p : pairs)
std::cout << indent << p.first << "=" << p.second << "\n";
}
void operator()(Ast::ConditionalBlock const& cb) {
bool const satisfied = ctx.defined.count(cb.required);
auto old_indent = indent;
indent += "\t";
std::cout << old_indent << "#ifdef " << cb.required << " // " << std::boolalpha << satisfied << "\n";
Preprocess isolated{ctx, indent+"// "}; // prevent changes to ctx to affect us for the non-matching branch
(satisfied? *this : isolated)(cb.if_);
std::cout << old_indent << "#else " << " // ifdef " << cb.required << "\n";
(satisfied? isolated : *this)(cb.else_);
std::cout << old_indent << "#endif " << " // ifdef " << cb.required << "\n";
indent.resize(indent.size()-1);
}
void operator()(Ast::Define const& directive) {
ctx.defined.insert(directive.name);
std::cout << indent << "#define\t" << directive.name;
report();
}
void operator()(Ast::Undef const& directive) {
ctx.defined.erase(directive.name);
std::cout << indent << "#undef\t" << directive.name;
report();
}
private:
void report() const {
std::cout << "\t// effective: ";
for (auto& sym : ctx.defined) std::cout << sym << " ";
std::cout << "\n";
}
};
}
int main() {
typedef boost::spirit::istream_iterator It;
typedef simple_grammar<It> my_grammar;
my_grammar gram; // Our grammar
Ast::Blocks ast; // Our tree
It it(std::cin >> std::noskipws), end;
bool b = qi::parse(it, end, gram, ast);
if (it != end)
std::cout << "Remaining input: '" << std::string(it, end) << "'\n";
assert(b);
std::cout << "Preprocess results:\n\n";
Logic::Preprocess pp({{"EXTERNAL"}} , " ");
pp(ast);
std::cout << "\n\nDefines still in effect: ";
for (auto& sym : pp.ctx.defined) std::cout << sym << " ";
}
BONUS: Debug Info
Enabling debug information yields the following detailed trace information in addition to the above output:
<start>
<try>#define FOO\nled_zepp</try>
<blocks>
<try>#define FOO\nled_zepp</try>
<block>
<try>#define FOO\nled_zepp</try>
<define>
<try>#define FOO\nled_zepp</try>
<pp_symbol>
<try>FOO\nled_zeppelin=9\nt</try>
<success>\nled_zeppelin=9\nthe_</success>
<attributes>[[F, O, O]]</attributes>
</pp_symbol>
<success>led_zeppelin=9\nthe_s</success>
<attributes>[[[F, O, O]]]</attributes>
</define>
<success>led_zeppelin=9\nthe_s</success>
<attributes>[[[F, O, O]]]</attributes>
</block>
<block>
<try>led_zeppelin=9\nthe_s</try>
<define>
<try>led_zeppelin=9\nthe_s</try>
<fail/>
</define>
<undef>
<try>led_zeppelin=9\nthe_s</try>
<fail/>
</undef>
<conditional_block>
<try>led_zeppelin=9\nthe_s</try>
<fail/>
</conditional_block>
<pair>
<try>led_zeppelin=9\nthe_s</try>
<success>the_shins=9\ndead_mau</success>
<attributes>[[[l, e, d, _, z, e, p, p, e, l, i, n], [9]]]</attributes>
</pair>
<pair>
<try>the_shins=9\ndead_mau</try>
<success>dead_mau5=6\nportishe</success>
<attributes>[[[t, h, e, _, s, h, i, n, s], [9]]]</attributes>
</pair>
<pair>
<try>dead_mau5=6\nportishe</try>
<success>portishead=10\n#ifdef</success>
<attributes>[[[d, e, a, d, _, m, a, u, 5], [6]]]</attributes>
</pair>
<pair>
<try>portishead=10\n#ifdef</try>
<success>#ifdef FOO\nfoo_fight</success>
<attributes>[[[p, o, r, t, i, s, h, e, a, d], [1, 0]]]</attributes>
</pair>
<pair>
<try>#ifdef FOO\nfoo_fight</try>
<fail/>
</pair>
<success>#ifdef FOO\nfoo_fight</success>
<attributes>[[[[l, e, d, _, z, e, p, p, e, l, i, n], [9]], [[t, h, e, _, s, h, i, n, s], [9]], [[d, e, a, d, _, m, a, u, 5], [6]], [[p, o, r, t, i, s, h, e, a, d], [1, 0]]]]</attributes>
</block>
<block>
<try>#ifdef FOO\nfoo_fight</try>
<define>
<try>#ifdef FOO\nfoo_fight</try>
<fail/>
</define>
<undef>
<try>#ifdef FOO\nfoo_fight</try>
<fail/>
</undef>
<conditional_block>
<try>#ifdef FOO\nfoo_fight</try>
<pp_symbol>
<try>FOO\nfoo_fighters=7\n#</try>
<success>\nfoo_fighters=7\n#def</success>
<attributes>[[F, O, O]]</attributes>
</pp_symbol>
<else_>
<try>foo_fighters=7\n#defi</try>
<fail/>
</else_>
<endif>
<try>foo_fighters=7\n#defi</try>
<fail/>
</endif>
<block>
<try>foo_fighters=7\n#defi</try>
<define>
<try>foo_fighters=7\n#defi</try>
<fail/>
</define>
<undef>
<try>foo_fighters=7\n#defi</try>
<fail/>
</undef>
<conditional_block>
<try>foo_fighters=7\n#defi</try>
<fail/>
</conditional_block>
<pair>
<try>foo_fighters=7\n#defi</try>
<success>#define ZOO\n#else\nth</success>
<attributes>[[[f, o, o, _, f, i, g, h, t, e, r, s], [7]]]</attributes>
</pair>
<pair>
<try>#define ZOO\n#else\nth</try>
<fail/>
</pair>
<success>#define ZOO\n#else\nth</success>
<attributes>[[[[f, o, o, _, f, i, g, h, t, e, r, s], [7]]]]</attributes>
</block>
<else_>
<try>#define ZOO\n#else\nth</try>
<fail/>
</else_>
<endif>
<try>#define ZOO\n#else\nth</try>
<fail/>
</endif>
<block>
<try>#define ZOO\n#else\nth</try>
<define>
<try>#define ZOO\n#else\nth</try>
<pp_symbol>
<try>ZOO\n#else\nthe_who=6\n</try>
<success>\n#else\nthe_who=6\n#de</success>
<attributes>[[Z, O, O]]</attributes>
</pp_symbol>
<success>#else\nthe_who=6\n#def</success>
<attributes>[[[Z, O, O]]]</attributes>
</define>
<success>#else\nthe_who=6\n#def</success>
<attributes>[[[Z, O, O]]]</attributes>
</block>
<else_>
<try>#else\nthe_who=6\n#def</try>
<success>the_who=6\n#define QU</success>
<attributes>[]</attributes>
</else_>
<else_>
<try>#else\nthe_who=6\n#def</try>
<success>the_who=6\n#define QU</success>
<attributes>[]</attributes>
</else_>
<endif>
<try>the_who=6\n#define QU</try>
<fail/>
</endif>
<block>
<try>the_who=6\n#define QU</try>
<define>
<try>the_who=6\n#define QU</try>
<fail/>
</define>
<undef>
<try>the_who=6\n#define QU</try>
<fail/>
</undef>
<conditional_block>
<try>the_who=6\n#define QU</try>
<fail/>
</conditional_block>
<pair>
<try>the_who=6\n#define QU</try>
<success>#define QUX\n#endif\n\n</success>
<attributes>[[[t, h, e, _, w, h, o], [6]]]</attributes>
</pair>
<pair>
<try>#define QUX\n#endif\n\n</try>
<fail/>
</pair>
<success>#define QUX\n#endif\n\n</success>
<attributes>[[[[t, h, e, _, w, h, o], [6]]]]</attributes>
</block>
<endif>
<try>#define QUX\n#endif\n\n</try>
<fail/>
</endif>
<block>
<try>#define QUX\n#endif\n\n</try>
<define>
<try>#define QUX\n#endif\n\n</try>
<pp_symbol>
<try>QUX\n#endif\n\n#ifdef E</try>
<success>\n#endif\n\n#ifdef EXTE</success>
<attributes>[[Q, U, X]]</attributes>
</pp_symbol>
<success>#endif\n\n#ifdef EXTER</success>
<attributes>[[[Q, U, X]]]</attributes>
</define>
<success>#endif\n\n#ifdef EXTER</success>
<attributes>[[[Q, U, X]]]</attributes>
</block>
<endif>
<try>#endif\n\n#ifdef EXTER</try>
<success>#ifdef EXTERNAL\n\n#if</success>
<attributes>[]</attributes>
</endif>
<endif>
<try>#endif\n\n#ifdef EXTER</try>
<success>#ifdef EXTERNAL\n\n#if</success>
<attributes>[]</attributes>
</endif>
<success>#ifdef EXTERNAL\n\n#if</success>
<attributes>[[[F, O, O], [[[[f, o, o, _, f, i, g, h, t, e, r, s], [7]]], [[Z, O, O]]], [[[[t, h, e, _, w, h, o], [6]]], [[Q, U, X]]]]]</attributes>
</conditional_block>
<success>#ifdef EXTERNAL\n\n#if</success>
<attributes>[[[F, O, O], [[[[f, o, o, _, f, i, g, h, t, e, r, s], [7]]], [[Z, O, O]]], [[[[t, h, e, _, w, h, o], [6]]], [[Q, U, X]]]]]</attributes>
</block>
<block>
<try>#ifdef EXTERNAL\n\n#if</try>
<define>
<try>#ifdef EXTERNAL\n\n#if</try>
<fail/>
</define>
<undef>
<try>#ifdef EXTERNAL\n\n#if</try>
<fail/>
</undef>
<conditional_block>
<try>#ifdef EXTERNAL\n\n#if</try>
<pp_symbol>
<try>EXTERNAL\n\n#ifdef ZOO</try>
<success>\n\n#ifdef ZOO\nzoowasd</success>
<attributes>[[E, X, T, E, R, N, A, L]]</attributes>
</pp_symbol>
<else_>
<try>#ifdef ZOO\nzoowasdef</try>
<fail/>
</else_>
<endif>
<try>#ifdef ZOO\nzoowasdef</try>
<fail/>
</endif>
<block>
<try>#ifdef ZOO\nzoowasdef</try>
<define>
<try>#ifdef ZOO\nzoowasdef</try>
<fail/>
</define>
<undef>
<try>#ifdef ZOO\nzoowasdef</try>
<fail/>
</undef>
<conditional_block>
<try>#ifdef ZOO\nzoowasdef</try>
<pp_symbol>
<try>ZOO\nzoowasdefined=ye</try>
<success>\nzoowasdefined=yes\n#</success>
<attributes>[[Z, O, O]]</attributes>
</pp_symbol>
<else_>
<try>zoowasdefined=yes\n#e</try>
<fail/>
</else_>
<endif>
<try>zoowasdefined=yes\n#e</try>
<fail/>
</endif>
<block>
<try>zoowasdefined=yes\n#e</try>
<define>
<try>zoowasdefined=yes\n#e</try>
<fail/>
</define>
<undef>
<try>zoowasdefined=yes\n#e</try>
<fail/>
</undef>
<conditional_block>
<try>zoowasdefined=yes\n#e</try>
<fail/>
</conditional_block>
<pair>
<try>zoowasdefined=yes\n#e</try>
<success>#else\nzoowasdefined=</success>
<attributes>[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [y, e, s]]]</attributes>
</pair>
<pair>
<try>#else\nzoowasdefined=</try>
<fail/>
</pair>
<success>#else\nzoowasdefined=</success>
<attributes>[[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [y, e, s]]]]</attributes>
</block>
<else_>
<try>#else\nzoowasdefined=</try>
<success>zoowasdefined=no\n#en</success>
<attributes>[]</attributes>
</else_>
<else_>
<try>#else\nzoowasdefined=</try>
<success>zoowasdefined=no\n#en</success>
<attributes>[]</attributes>
</else_>
<endif>
<try>zoowasdefined=no\n#en</try>
<fail/>
</endif>
<block>
<try>zoowasdefined=no\n#en</try>
<define>
<try>zoowasdefined=no\n#en</try>
<fail/>
</define>
<undef>
<try>zoowasdefined=no\n#en</try>
<fail/>
</undef>
<conditional_block>
<try>zoowasdefined=no\n#en</try>
<fail/>
</conditional_block>
<pair>
<try>zoowasdefined=no\n#en</try>
<success>#endif\n\n#ifdef QUX\nq</success>
<attributes>[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [n, o]]]</attributes>
</pair>
<pair>
<try>#endif\n\n#ifdef QUX\nq</try>
<fail/>
</pair>
<success>#endif\n\n#ifdef QUX\nq</success>
<attributes>[[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [n, o]]]]</attributes>
</block>
<endif>
<try>#endif\n\n#ifdef QUX\nq</try>
<success>#ifdef QUX\nquxwasdef</success>
<attributes>[]</attributes>
</endif>
<endif>
<try>#endif\n\n#ifdef QUX\nq</try>
<success>#ifdef QUX\nquxwasdef</success>
<attributes>[]</attributes>
</endif>
<success>#ifdef QUX\nquxwasdef</success>
<attributes>[[[Z, O, O], [[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [y, e, s]]]], [[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [n, o]]]]]]</attributes>
</conditional_block>
<success>#ifdef QUX\nquxwasdef</success>
<attributes>[[[Z, O, O], [[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [y, e, s]]]], [[[[z, o, o, w, a, s, d, e, f, i, n, e, d], [n, o]]]]]]</attributes>
</block>
....
</start>
¹ or you should have a rather complicated tree to match at parse time. Whenever in doubt, separate parsing from processing. This ties in closely with Boost Spirit: "Semantic actions are evil"?