4

I am a beginner in using boost spirit

Say that I have the following code that parse a simple arithmetic expression with variables:

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/variant/recursive_variant.hpp>
#include <boost/variant/apply_visitor.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/phoenix_function.hpp>
#include <boost/foreach.hpp>

#include <iostream>
#include <string>

namespace client {
    namespace ast
    {
        struct nil {};
        struct signed_;
        struct program;

        typedef boost::variant<
            nil
            , double
            , boost::recursive_wrapper<signed_>
            , boost::recursive_wrapper<program>
        >
        operand;

        struct signed_
        {
            char sign;
            operand operand_;
        };

        struct operation
        {
            char operator_;
            operand operand_;
        };

        struct program
        {
            operand first;
            std::list<operation> rest;
        };
    }
}

BOOST_FUSION_ADAPT_STRUCT(
    client::ast::signed_,
    (char, sign)
    (client::ast::operand, operand_)
    )

    BOOST_FUSION_ADAPT_STRUCT(
    client::ast::operation,
    (char, operator_)
    (client::ast::operand, operand_)
    )

    BOOST_FUSION_ADAPT_STRUCT(
    client::ast::program,
    (client::ast::operand, first)
    (std::list<client::ast::operation>, rest)
    )

namespace client {
    namespace ast
    {
        struct eval
        {
            typedef double result_type;

            double operator()(nil) const { BOOST_ASSERT(0); return 0; }
            double operator()(double n) const { return n; }

            double operator()(operation const& x, double lhs) const
            {
                double rhs = boost::apply_visitor(*this, x.operand_);
                switch (x.operator_)
                {
                case '+': return lhs + rhs;
                case '-': return lhs - rhs;
                case '*': return lhs * rhs;
                case '/': return lhs / rhs;
                }
                BOOST_ASSERT(0);
                return 0;
            }

            double operator()(signed_ const& x) const
            {
                double rhs = boost::apply_visitor(*this, x.operand_);
                switch (x.sign)
                {
                case '-': return -rhs;
                case '+': return +rhs;
                }
                BOOST_ASSERT(0);
                return 0;
            }

            double operator()(program const& x) const
            {
                double state = boost::apply_visitor(*this, x.first);
                BOOST_FOREACH(operation const& oper, x.rest)
                {
                    state = (*this)(oper, state);
                }
                return state;
            }
        };
    }
}

namespace client
{
    namespace qi = boost::spirit::qi;
    namespace ascii = boost::spirit::ascii;
    using boost::phoenix::function;

    template <typename Iterator>
    struct calculator : qi::grammar<Iterator, ast::program(), ascii::space_type>
    {
        calculator() : calculator::base_type(expression)
        {
            qi::char_type char_;
            qi::double_type doubleParser_;

            symboleTable.add("var1", 2);
            symboleTable.add("var2", 15);
            symboleTable.add("var4", 5);
            symboleTable.add("var", 5);
            symboleTable.add("x", 5);

            expression =
                term
                >> *((char_('+') > term)
                | (char_('-') > term)
                )
                ;

            term =
                factor
                >> *((char_('*') > factor)
                | (char_('/') > factor)
                )
                ;

            factor =
                doubleParser_
                | symbolTable
                | '(' > expression > ')'
                | (char_('-') > factor)
                | (char_('+') > factor)
                ;
        }
        qi::symbols<char, double> symbolTable;
        qi::rule<Iterator, ast::program(), ascii::space_type> expression;
        qi::rule<Iterator, ast::program(), ascii::space_type> term;
        qi::rule<Iterator, ast::operand(), ascii::space_type> factor;
    };
}

/////////////////////////////////////////////////////////////////////////////
//  Main program
/////////////////////////////////////////////////////////////////////////////
int
main()
{
    std::cout << "/////////////////////////////////////////////////////////\n\n";
    std::cout << "Expression parser...\n\n";
    std::cout << "/////////////////////////////////////////////////////////\n\n";
    std::cout << "Type an expression...or [q or Q] to quit\n\n";

    typedef std::string::const_iterator iterator_type;
    typedef client::calculator<iterator_type> calculator;
    typedef client::ast::program ast_program;
    typedef client::ast::eval ast_eval;

    std::string str;
    while (std::getline(std::cin, str))
    {
        if (str.empty() || str[0] == 'q' || str[0] == 'Q')
            break;

        calculator calc;        // Our grammar
        ast_program program;    // Our program (AST)
        ast_eval eval;          // Evaluates the program

        std::string::const_iterator iter = str.begin();
        std::string::const_iterator end = str.end();

        boost::spirit::ascii::space_type space;
        bool r = phrase_parse(iter, end, calc, space, program);

        if (r && iter == end)
        {
            std::cout << "-------------------------\n";
            std::cout << "Parsing succeeded\n";
            std::cout << "\nResult: " << eval(program) << std::endl;
            std::cout << "-------------------------\n";
        }
        else
        {
            std::string rest(iter, end);
            std::cout << "-------------------------\n";
            std::cout << "Parsing failed\n";
            std::cout << "-------------------------\n";
        }
    }

    std::cout << "Bye... :-) \n\n";
    return 0;
}

I want to print the variables (not their values) when they are matched by the symbol table (declared in the grammar).

So for example when the input is

var* 2 - 3 +x*var2 - 2

the output should be:

var
x
var2

any help?

Motaz
  • 300
  • 1
  • 2
  • 17

2 Answers2

2

The AST used doesn't store the original variable referenced.

Hence after parsing the information is no longer available (the AST just contains value nodes instead of the original reference).

There are two ways about this:

  • enrich the AST so you resolve variables at evaluation time only (keeping the variable reference names)

    UPDATE I have added another answer that actually implements this, more elaborate, approach

  • have the parser collect variable references "out-of-band" during parsing.

The latter requires vastly smaller effort (if you know the tricks of Spirit + Phoenix). So let's show that:

        factor =
            doubleParser_
            | variable
            | '(' > expression > ')'
            | (char_('-') > factor)
            | (char_('+') > factor)
            ;

Here I replaced the symbolTable by a new rule: variable:

    qi::rule<Iterator, double()> variable; // NOTE: also made it a lexeme (no skipper)

That rule still exposes just the value but as a side effect we will have it collect the reference into a set of variable names:

        variable %=  
               &qi::as_string[qi::raw[symbolTable]] 
                     [ px::insert(px::ref(collect_references), qi::_1) ] 
            >> symbolTable
            ;

As you can see, it is a quick-and-dirty approach leveraging a lot of Spirit tricks (operator%= auto-rule assignment, qi::raw and qi::as_string directives, phoenix::insert and the second parse by using the positive look-ahead assertion (operator&).

Now, we just need to pass in a collect_references container to the grammar, and we can print the references after successful parsing:

    std::set<std::string> collected_references;
    calculator calc(collected_references); // Our grammar

    if (r && iter == end)
    {
        std::cout << "-------------------------\n";
        std::cout << "Parsing succeeded\n";
        std::cout << "References: ";
        std::copy(collected_references.begin(), collected_references.end(),
                std::ostream_iterator<std::string>(std::cout, " "));

        std::cout << "\nResult: " << eval(program) << std::endl;
        std::cout << "-------------------------\n";
    }

It prints:

Type an expression...or [q or Q] to quit

var* 2 - 3 +x*var2 - 2
-------------------------
Parsing succeeded
References: var var2 x 
Result: 80
-------------------------
Bye... :-) 

DEMO CODE

Live On Coliru

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/variant/recursive_variant.hpp>
#include <boost/variant/apply_visitor.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/foreach.hpp>

#include <iostream>
#include <string>
#include <set>

namespace client {
    namespace ast
    {
        struct nil {};
        struct signed_;
        struct program;

        typedef boost::variant<
            nil
            , double
            , boost::recursive_wrapper<signed_>
            , boost::recursive_wrapper<program>
        >
        operand;

        struct signed_
        {
            char sign;
            operand operand_;
        };

        struct operation
        {
            char operator_;
            operand operand_;
        };

        struct program
        {
            operand first;
            std::list<operation> rest;
        };
    }
}

BOOST_FUSION_ADAPT_STRUCT(
    client::ast::signed_,
    (char, sign)
    (client::ast::operand, operand_)
    )

    BOOST_FUSION_ADAPT_STRUCT(
    client::ast::operation,
    (char, operator_)
    (client::ast::operand, operand_)
    )

    BOOST_FUSION_ADAPT_STRUCT(
    client::ast::program,
    (client::ast::operand, first)
    (std::list<client::ast::operation>, rest)
    )

namespace client {
    namespace ast
    {
        struct eval
        {
            typedef double result_type;

            double operator()(nil) const { BOOST_ASSERT(0); return 0; }
            double operator()(double n) const { return n; }

            double operator()(operation const& x, double lhs) const
            {
                double rhs = boost::apply_visitor(*this, x.operand_);
                switch (x.operator_)
                {
                case '+': return lhs + rhs;
                case '-': return lhs - rhs;
                case '*': return lhs * rhs;
                case '/': return lhs / rhs;
                }
                BOOST_ASSERT(0);
                return 0;
            }

            double operator()(signed_ const& x) const
            {
                double rhs = boost::apply_visitor(*this, x.operand_);
                switch (x.sign)
                {
                case '-': return -rhs;
                case '+': return +rhs;
                }
                BOOST_ASSERT(0);
                return 0;
            }

            double operator()(program const& x) const
            {
                double state = boost::apply_visitor(*this, x.first);
                BOOST_FOREACH(operation const& oper, x.rest)
                {
                    state = (*this)(oper, state);
                }
                return state;
            }
        };
    }
}

namespace client
{
    namespace qi = boost::spirit::qi;
    namespace ascii = boost::spirit::ascii;

    template <typename Iterator>
    struct calculator : qi::grammar<Iterator, ast::program(), ascii::space_type>
    {
        calculator(std::set<std::string>& collect_references) : calculator::base_type(expression)
        {
            qi::char_type char_;
            qi::double_type doubleParser_;

            symbolTable.add("var1", 2);
            symbolTable.add("var2", 15);
            symbolTable.add("var4", 5);
            symbolTable.add("var",  5);
            symbolTable.add("x",    5);

            namespace px = boost::phoenix;

            expression =
                term
                >> *((char_('+') > term)
                 |  (char_('-') > term)
                )
                ;

            term =
                factor
                >> *((char_('*') > factor)
                | (char_('/') > factor)
                )
                ;

            variable %=  
                   &qi::as_string[qi::raw[symbolTable]] 
                         [ px::insert(px::ref(collect_references), qi::_1) ] 
                >> symbolTable
                ;

            factor =
                doubleParser_
                | variable
                | ('(' > expression > ')')
                | (char_('-') > factor)
                | (char_('+') > factor)
                ;
        }
      private:
        qi::symbols<char, double> symbolTable;
        qi::rule<Iterator, double()> variable; // NOTE: also made it a lexeme (no skipper)
        qi::rule<Iterator, ast::program(), ascii::space_type> expression;
        qi::rule<Iterator, ast::program(), ascii::space_type> term;
        qi::rule<Iterator, ast::operand(), ascii::space_type> factor;
    };
}

/////////////////////////////////////////////////////////////////////////////
//  Main program
/////////////////////////////////////////////////////////////////////////////
int
main()
{
    std::cout << "/////////////////////////////////////////////////////////\n\n";
    std::cout << "Expression parser...\n\n";
    std::cout << "/////////////////////////////////////////////////////////\n\n";
    std::cout << "Type an expression...or [q or Q] to quit\n\n";

    typedef std::string::const_iterator iterator_type;
    typedef client::calculator<iterator_type> calculator;
    typedef client::ast::program ast_program;
    typedef client::ast::eval ast_eval;

    std::string str;
    while (std::getline(std::cin, str))
    {
        if (str.empty() || str[0] == 'q' || str[0] == 'Q')
            break;

        std::set<std::string> collected_references;
        calculator calc(collected_references); // Our grammar
        ast_program program;                   // Our program (AST)
        ast_eval eval;                         // Evaluates the program

        std::string::const_iterator iter = str.begin();
        std::string::const_iterator end = str.end();

        boost::spirit::ascii::space_type space;
        bool r = phrase_parse(iter, end, calc, space, program);

        if (r && iter == end)
        {
            std::cout << "-------------------------\n";
            std::cout << "Parsing succeeded\n";
            std::cout << "References: ";
            std::copy(collected_references.begin(), collected_references.end(),
                    std::ostream_iterator<std::string>(std::cout, " "));

            std::cout << "\nResult: " << eval(program) << std::endl;
            std::cout << "-------------------------\n";
        }
        else
        {
            std::string rest(iter, end);
            std::cout << "-------------------------\n";
            std::cout << "Parsing failed\n";
            std::cout << "-------------------------\n";
        }
    }

    std::cout << "Bye... :-) \n\n";
    return 0;
}
Community
  • 1
  • 1
sehe
  • 374,641
  • 47
  • 450
  • 633
  • Posted [another answer that does the "enhanced AST" approach](http://stackoverflow.com/a/32501979/85371). See the [recorded livestream](https://www.livecoding.tv/video/enriching-ast-for-variable-references-vs-qd/) in case you're interested in how I go about these answers. ([experiment](http://chat.stackoverflow.com/transcript/10?m=24182469#24182469)) – sehe Sep 10 '15 at 12:33
  • sehe, you seem to be very knowledgeable about Boost. Wondering if you could check out a question I asked recently. I had not other way to get a hold of you... – Dylan_Larkin Nov 02 '15 at 19:13
  • 1
    @Dylan_Larkin Even I take breaks to eat and put the kids to bed :) 2 hours is not long... – sehe Nov 02 '15 at 19:16
  • sehe, I understand. Well, if you get a chance, here it is for quick reference: http://stackoverflow.com/questions/33483137/boost-semantic-actions-causing-parsing-issues – Dylan_Larkin Nov 02 '15 at 19:21
  • @Dylan_Larkin I obviously found it :). I'm doing something else first. Join me on https://www.livecoding.tv/sehe/ if you want to be sure to know as soon as possible :) – sehe Nov 02 '15 at 19:22
2

The first, more involved approach described here modifies the AST to represent the variable references.

This has multiple advantages:

  • the richer information allows you to do e.g. collection of variable references after the parse
  • the delayed evaluation of variables allows reuse of the same AST with a different set of values for the variables! This is more like how actual interpreters work.

Let's start:

  1. Modifying the AST:

    We want to be able to have a variable name in addition to the existing types of expressions:

    typedef boost::variant<
        nil
        , std::string // THIS LINE ADDED
        , double
        , boost::recursive_wrapper<signed_>
        , boost::recursive_wrapper<program>
    >
    operand;
    
  2. Modifying the rule:

    Wwe want to keep the name of the variable instead of immediately replacing it by a fixed value:

        factor =
              qi::double_
            | qi::as_string[qi::raw[symbolTable]] // THIS LINE CHANGED
            | '(' > expression > ')'
            | (char_('-') > factor)
            | (char_('+') > factor)
            ;
    
  3. Modifying the evalation visitor

    Now we need to "replace" the variables by their value during evaluation.

    Let's add a simple overload to the visitor:

    double operator()(std::string const& var) const { return symbols.at(var);    } 
    

    We have given the visitor a reference to a map symbols for lookup:

    std::map<std::string, double>& symbols;
    eval(std::map<std::string, double>& symbols) : symbols(symbols) {}
    
  4. Calling it from main:

    So we need to have a map of variables handy:

    std::map<std::string, double> symbols { {"var1", 2}, {"var2", 15}, {"var4", 5}, {"var", 5}, {"x", 5} };
    

    And pass a reference to the visitor:

    ast_eval eval(symbols) ; // Evaluates the program
    

At this point, the program operates exactly as the original program, but with an enriched AST:

Live On Coliru

Printing

-------------------------
Parsing succeeded

Result: 80
-------------------------

Enumerating variables referenced!

The point of the story becomes simple now: we just define another visitor like eval to extract references:

namespace client { namespace ast {
    struct extract_refs : boost::static_visitor<void>
    {
        std::set<std::string>& _references;

        extract_refs(std::set<std::string>& refs) : _references(refs) {}

        void operator()(std::string const& var) const { _references.insert(var);                 } 
        void operator()(operation const& x) const     { boost::apply_visitor(*this, x.operand_); } 
        void operator()(signed_ const& x) const       { boost::apply_visitor(*this, x.operand_); } 
        void operator()(program const& x) const       {
            boost::apply_visitor(*this, x.first);
            BOOST_FOREACH(operation const& oper, x.rest) (*this)(oper);
        }

        // ignore anything else
        template <typename Other> void operator()(Other const&) const {}
    };
} }

This can simply be applied to the AST:

std::set<std::string> references;
client::ast::extract_refs extract(references);

extract(program);

std::cout << "References: ";
std::copy(references.begin(), references.end(), std::ostream_iterator<std::string>(std::cout, " "));

And the output once again becomes

-------------------------
Parsing succeeded
References: var var2 x 
Result: 80
-------------------------

Quod Erat Demonstrandum.

Live On Coliru

Community
  • 1
  • 1
sehe
  • 374,641
  • 47
  • 450
  • 633
  • Thank you very much, that's really great. However, just a simple note, you have forgotten to include map in the mentioned code so when you try to compile the code in VS2013 you will get a compiler error. Again, thank you very much this is greatly helpful. – Motaz Sep 10 '15 at 12:49