Boost phoenix variadic function parse

Question

I have a parser code as below for a function "TakeOne". TakeOne function works like it returns the first parameter which is not equal to '%null%' Ex:

TakeOne( %null% , '3', 'defaultVal'); -->  result = 3
TakeOne( 5 , 'asd', 'defaultVal');   -> result = 5

Now I want to modify this function to

TakeOne(parm1, parm2, ... , defaultValue);

Is it possible to do this without using C++11 features? Thanks

#include <string>
#include <fstream>
#include <algorithm>
#include "sstream"
#include <locale.h>
#include <iomanip>

#define BOOST_SPIRIT_USE_PHOENIX_V3

#include <boost/functional/hash.hpp>
#include <boost/variant.hpp>
#include <boost/smart_ptr.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/numeric/conversion/cast.hpp>
#include <boost/tokenizer.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/phoenix/function/adapt_function.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/math/constants/constants.hpp>
#include <boost/math/special_functions/round.hpp>
#include <boost/exception/diagnostic_information.hpp> 
#include <boost/algorithm/string.hpp> 

namespace qi    = boost::spirit::qi;
namespace phx   = boost::phoenix;

typedef double NumValue;
typedef boost::variant<double, std::wstring> GenericValue;

const std::wstring ParserNullChar = L"%null%";
const double NumValueDoubleNull = std::numeric_limits<double>::infinity();

//Convert string to numeric values
struct AsNumValue : boost::static_visitor<double>
{
    double operator()(double d)              const { return d; }
    double operator()(std::wstring const& s) const
    { 
        if(boost::iequals(s, ParserNullChar))
        {
            return NumValueDoubleNull;
        }
        try { return boost::lexical_cast<double>(s); } 
        catch(...) 
        {
            throw;
        }
    }
};

double Num(GenericValue const& val)
{ 
    return boost::apply_visitor(AsNumValue(), val);
}

bool CheckIfNumValueIsNull(double num)
{
    if(num == NumValueDoubleNull)
        return true;
    else
        return false;
}


bool CheckIfGenericValIsNull(const GenericValue& val)
{
    std::wostringstream woss;
    woss << val;

    if(boost::iequals(woss.str(), ParserNullChar))
    {
        return true;
    }
    else if(val.which() != 1 && CheckIfNumValueIsNull(Num(val)))
    {
        return true;
    }
    else
        return false;
}


GenericValue TakeOne(GenericValue val1, GenericValue val2, GenericValue def)
{
  if(!CheckIfGenericValIsNull(val1))
    {
         return val1;
    }
    else if(!CheckIfGenericValIsNull(val2))
    {
        return val2;
    }
    else
        return def;
}



BOOST_PHOENIX_ADAPT_FUNCTION(GenericValue, TakeOne_, TakeOne, 3)

template <typename It, typename Skipper = qi::space_type >
struct MapFunctionParser : qi::grammar<It, GenericValue(), Skipper>
{
    MapFunctionParser() : MapFunctionParser::base_type(expr_)
    {
        using namespace qi;

        function_call_ = 
          (no_case[L"TakeOne"] > '(' > expr_ > ',' > expr_ > ',' > expr_ > ')') 
            [_val = TakeOne_(_1, _2, _3) ];


        string_ =   (L'"' > *~char_('"') > L'"')
            | (L"'" > *~char_("'") > L"'"); 


        factor_ =
    (no_case[ParserNullChar])     [_val = NumValueDoubleNull]
        |   double_                    [ _val = _1]
        |   string_              [ _val = _1]
        |   function_call_       [ _val = _1]
        ;


        expr_ = factor_;

        on_error<fail> ( expr_, std::cout
            << phx::val("Error! Expecting ") << _4 << phx::val(" here: \"")
            << phx::construct<std::string>(_3, _2) << phx::val("\"\n"));

#ifdef _DEBUG 
        BOOST_SPIRIT_DEBUG_NODE(function_call_);
        BOOST_SPIRIT_DEBUG_NODE(expr_);
        BOOST_SPIRIT_DEBUG_NODE(string_);
        BOOST_SPIRIT_DEBUG_NODE(factor_);
#endif
    }

private:
    qi::rule<It, std::wstring()> string_;
    qi::rule<It, GenericValue(), Skipper> function_call_, expr_, factor_;

};


int main()
{
    std::wstringstream wss;
    typedef std::wstring::const_iterator AttIter;
    MapFunctionParser<AttIter , boost::spirit::qi::space_type> mapFunctionParser;
    bool ret;
    GenericValue result;

    std::wstring functionStr = L"TakeOne(%null%, 5, 'default')";
    std::wstring::const_iterator beginExpression(functionStr.begin());
    std::wstring::const_iterator endExpression(functionStr.end());

    ret = boost::spirit::qi::phrase_parse(beginExpression,endExpression,mapFunctionParser,boost::spirit::qi::space,result);

    std::wcout << result << std::endl;
    return 0;
}

There are several code smells in this sample. If you care I could address them at a later time. — sehe, Nov 05 '13 at 16:02
[Posted](http://stackoverflow.com/questions/19784851/boost-phoenix-variadic-function-parse/19799571#19799571). Note there are still some issues left (mixing `wcout` and `cout` is [undefined behaviour, I think](http://stackoverflow.com/a/8957069/85371)). Cheers — sehe, Nov 05 '13 at 21:46

score 1 · Accepted Answer · edited May 23 '17 at 12:17

Here's a second answer to address (some of the) the XY-problem(s) you are probably trying to solve.

As I noted in a comment there are a number of code smells^[1] in your example. Let me explain what I mean.

The Goal

Let's consider what the goal of the program is: You're doing input parsing.

The result of parsing should be data, preferrably in C++ datatypes with strong type information, so you can avoid working with quirky (possibly invalid) variable text representations and focus on business logic.

The Smells

Now on to the smells:

You define "abstract datatypes" (like NumValue) but then you fail to use them consistently:

typedef double NumValue;
typedef boost::variant<double, std::wstring> GenericValue; 
//                     ^--- should be NumValue

Be more consistent, and make your code reflect the design:

namespace ast {
    typedef double                         Number;
    typedef std::wstring                   String;
    typedef boost::variant<Number, String> Value;
}

You use a parser generator for the parsing, yet you are also invoking
- boost::lexical_cast<double> on ... strings
- wostringstream, from which you (forgetting std::ios::skipws...) extract "a" string
- boost::iequals to compare strings, that should already have been parsed into their strongly-typed AST types, independent of letter-case.
- You have a static_visitor to act on variant types, yet you rely on stringification (using wostringstream). In fact, you only ever call the visitor on that variant iff you already know that it's a number:
```
else if(val.which() != 1 && CheckIfNumValueIsNull(Num(val)))
```
  That's a bit funny, because in this case you could just have used boost::get<NumValue>(val) to get the known-type value out.
Pro Tip: Using "lowlevel" parsing/streaming operations while using a high-level parser-generator, is a code smell
Your generic value variant suggests that your grammar supports two kind of values. Yet, your grammar definition clearly shows that you have a third type of value: %null%.

There's evidence that you got somewhat confused by this yourself, as we can see the parser
- parsing the literal %null% (or %NULL% etc) into ... some kind of magic number.
- therefore, we know that iff %null% was parsed, it will always be a NumValue in your AST
- we also know that strings will always be parsed into a wstring subtype GenericValue
- yet, we can see you treat any GenericValue as potentially null?
All in all it leads to a rather surprising...

Summary: You have AsNumValue which you (ironically) appear to be using to find out whether a String might actually be Null

Hint: a String could never represent the %null% to begin with, it makes no sense to transform random strings into numbers, and random 'magic numeric values' should not have been used to represent Null in the first place.
Your grammar makes unbalanced use of semantic actions:
```
factor_ =
    (no_case[ParserNullChar])     [_val = NumValueDoubleNull]
        |   double_               [ _val = _1]
        |   string_              [ _val = _1]
        |   function_call_       [ _val = _1]
        ;
```
We notice that you are simultaneously
- using SA's to manually perform what automatic attribute propagation is supposed to do ([_val = _1])
- using a single branch for "magic" purposes (this is where you require a Null AST datatype)
In my suggested solution below, the rule becomes:
```
factor_ = null_ | string_ | double_ | function_call_;
```
That's it.

Pro Tip: Using Semantic Actions sparingly (see also Boost Spirit: "Semantic actions are evil"?)

The Solution

All in all, plenty of room to simplify and cleanup. In the AST department,

Extend the Value variant with an explicit Null subtype
Rename types and move into a namespace for readability
Drop the AsNumValue function which has no purpose. Instead, have an IsNull visitor that just reports true for Null values.

namespace ast {
    typedef double       Number;
    typedef std::wstring String;
    struct               Null {};

    typedef boost::variant<Null, Number, String> Value;

    struct IsNull
    {
        typedef bool result_type;
        template <typename... T>
        constexpr result_type operator()(T const&...) const { return false; }
        constexpr result_type operator()(Null const&) const { return true;  }
    };
}

In the Grammar department,

Factor the grammar into rules that match the AST nodes

qi::rule<It, ast::String()>         string_;
qi::rule<It, ast::Number()>         number_;
qi::rule<It, ast::Null()>           null_;
qi::rule<It, ast::Value(), Skipper> factor_, expr_, function_call_;

This makes your grammar simple to maintain and to reason about:

string_ = (L'"' > *~char_('"') > L'"')
        | (L"'" > *~char_("'") > L"'")
        ; 
number_ = double_;
null_   = no_case["%null%"] > attr(ast::Null());
factor_ = null_ | string_ | double_ | function_call_;
expr_   = factor_;

BOOST_SPIRIT_DEBUG_NODES((expr_)(factor_)(null_)(string_)(number_)(function_call_))

Note it makes debug output more informative too
I've taken the liberty to rename TakeOne to Coalesce^[2]:
```
function_call_ = no_case[L"Coalesce"] 
    > ('(' > expr_ % ',' > ')') [ _val = Coalesce_(_1) ];
```
This is still using the approach like I showed in the other answer, only, the implementation has become much simpler because there is no longer so much confusion about what could be Null

Take Away: Values that are Null are just... Null!

Removing the now unused header includes, and adding a load of test inputs:

int main()
{
    typedef std::wstring::const_iterator It;
    MapFunctionParser<It, boost::spirit::qi::space_type> parser;

    for (std::wstring input : { 
            L"Coalesce()",
            L"Coalesce('simple')",
            L"CoALesce(99)",
            L"CoalESCe(%null%, 'default')",
            L"coalesce(%null%, -inf)",
            L"COALESCE(%null%, 3e-1)",
            L"Coalesce(%null%, \"3e-1\")",
            L"COALESCE(%null%, 5, 'default')",
            L"COALESCE(%NULL%, %null%, %nuLl%, %NULL%, %null%, %null%, %nUll%, %null%, %NULL%, %nUll%, %null%, \n"
                    L"%NULL%, %NULL%, %null%, %null%, %nUll%, %null%, %nUll%, %nULl%, %null%, %null%, %null%, \n"
                    L"%null%, %nUll%, %NULL%, %null%, %null%, %null%, %null%, %null%, %null%, %nUll%, %nulL%, \n"
                    L"%null%, %null%, %nUll%, %NULL%, 'this is the first nonnull',    %nUll%, %null%, %Null%, \n"
                    L"%NULL%, %null%, %null%, %null%, %NULL%, %null%, %null%, %null%, %NULL%, %NULL%, %nuLl%, \n"
                    L"%null%, %null%, %nUll%, %nuLl%, 'this is the default')",
        })
    {
        It begin(input.begin()), end(input.end());

        ast::Value result;
        bool ret = phrase_parse(begin, end, parser, qi::space, result);

        std::wcout << std::boolalpha << ret << ":\t" << result << std::endl;
    }
}

We can now test the parsing, evaluation and error-handling:

Error! Expecting <list><expr_>"," here: ")"
false:  %null%
true:   simple
true:   99
true:   default
true:   -inf
true:   0.3
true:   3e-1
true:   5
true:   this is the first nonnull

See it Live On Coliru

Without the extra test cases it takes about 77 lines of code, less than half of your original code.

Full Code

For future reference

//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/phoenix/function/adapt_function.hpp>

namespace qi  = boost::spirit::qi;
namespace phx = boost::phoenix;

namespace ast {
    typedef double       Number;
    typedef std::wstring String;
    struct  Null { 
        friend std::wostream& operator<<(std::wostream& os, Null) { return os << L"%null%"; } 
        friend std:: ostream& operator<<(std:: ostream& os, Null) { return os <<  "%null%"; } 
    };

    typedef boost::variant<Null, Number, String> Value;

    struct IsNull
    {
        typedef bool result_type;
        template <typename... T>
        constexpr result_type operator()(T const&...) const { return false; }
        constexpr result_type operator()(Null const&) const { return true;  }
    };

    Value Coalesce(std::vector<Value> const& arglist) {
        for (auto& v : arglist)
            if (!boost::apply_visitor(IsNull(), v))
                return v;
        //
        if (arglist.empty())
            return Value(Null());
        else
            return arglist.back(); // last is the default, even if Null
    }
}

BOOST_PHOENIX_ADAPT_FUNCTION(ast::Value, Coalesce_, ast::Coalesce, 1)

template <typename It, typename Skipper = qi::space_type >
struct MapFunctionParser : qi::grammar<It, ast::Value(), Skipper>
{
    MapFunctionParser() : MapFunctionParser::base_type(expr_)
    {
        using namespace qi;

        function_call_ = no_case[L"Coalesce"] 
            > ('(' > expr_ % ',' > ')') [ _val = Coalesce_(_1) ];

        string_ =   
                (L'"' > *~char_('"') > L'"')
            | (L"'" > *~char_("'") > L"'"); 

        number_ = double_;

        null_   = no_case["%null%"] > attr(ast::Null());

        factor_ = null_ | string_ | double_ | function_call_;

        expr_   = factor_;

        on_error<fail> (expr_, std::cout
            << phx::val("Error! Expecting ") << _4 << phx::val(" here: \"")
            << phx::construct<std::string>(_3, _2) << phx::val("\"\n"));

        BOOST_SPIRIT_DEBUG_NODES((expr_)(factor_)(null_)(string_)(number_)(function_call_))
    }

private:
    qi::rule<It, ast::String()>         string_;
    qi::rule<It, ast::Number()>         number_;
    qi::rule<It, ast::Null()>           null_;
    qi::rule<It, ast::Value(), Skipper> factor_, expr_, function_call_;
};

int main()
{
    typedef std::wstring::const_iterator It;
    MapFunctionParser<It, boost::spirit::qi::space_type> parser;

    for (std::wstring input : { 
            L"Coalesce()",
            L"Coalesce('simple')",
            L"CoALesce(99)",
            L"CoalESCe(%null%, 'default')",
            L"coalesce(%null%, -inf)",
            L"COALESCE(%null%, 3e-1)",
            L"Coalesce(%null%, \"3e-1\")",
            L"COALESCE(%null%, 5, 'default')",
            L"COALESCE(%NULL%, %null%, %nuLl%, %NULL%, %null%, %null%, %nUll%, %null%, %NULL%, %nUll%, %null%, \n"
                    L"%NULL%, %NULL%, %null%, %null%, %nUll%, %null%, %nUll%, %nULl%, %null%, %null%, %null%, \n"
                    L"%null%, %nUll%, %NULL%, %null%, %null%, %null%, %null%, %null%, %null%, %nUll%, %nulL%, \n"
                    L"%null%, %null%, %nUll%, %NULL%, 'this is the first nonnull',    %nUll%, %null%, %Null%, \n"
                    L"%NULL%, %null%, %null%, %null%, %NULL%, %null%, %null%, %null%, %NULL%, %NULL%, %nuLl%, \n"
                    L"%null%, %null%, %nUll%, %nuLl%, 'this is the default')",
        })
    {
        It begin(input.begin()), end(input.end());

        ast::Value result;
        bool ret = phrase_parse(begin, end, parser, qi::space, result);

        std::wcout << std::boolalpha << ret << ":\t" << result << std::endl;
    }
}

^[1] Origin? http://c2.com/cgi/wiki?CodeSmell (maybe it was Kent Beck?)

^[2] Coalesce referring to corresponding functions in some programming languages

Oh, haha. Just found this: "However, using `lexical_cast` inside Boost Spirit to... parse a number (!?!) ***is a travesty***" (_in answer d.d. [May 27 at 21:01](http://stackoverflow.com/a/16780496/85371)_). I don't know what to say. "I told you so"? Nah. But I _did_ tell you before :) — sehe, Nov 05 '13 at 22:01
(note: just quoting that for my own amusement... it's alright :/ I didn't remember all that) — sehe, Nov 05 '13 at 22:02

score 0 · Answer 2 · edited May 23 '17 at 12:33

Update: I just addressed some other concerns in a second answer. Code went down to 77 lines while getting simpler and more robust (and answering your question too).

The direct answer to your Phoenix question would be yes:

struct TakeOne
{
    template <typename...> struct result { typedef GenericValue type; };

    template <typename... Args> 
        GenericValue operator()(Args const&... args) const {
            return first(args...);
        }

private:
    GenericValue first(GenericValue const& def) const {
        return def;
    }
    template <typename... Args> GenericValue first(GenericValue const& def, GenericValue const& head, Args const&... tail) const {
        if (CheckIfGenericValIsNull(head)) 
            return first(def, tail...);
        else 
            return head;
    }
};

static const boost::phoenix::function<TakeOne> TakeOne_;

And it would behave largely the same (although you'd need to pass the default as the first argument):

function_call_ = no_case[L"TakeOne"] > (
        ('(' > expr_ > ',' > expr_ > ')'                      ) [_val=TakeOne_(_2,_1)]
      | ('(' > expr_ > ',' > expr_ > ',' > expr_ > ')'        ) [_val=TakeOne_(_3,_1,_2)]
      | ('(' > expr_ > ',' > expr_ > ',' > expr_ > expr_ > ')') [_val=TakeOne_(_4,_1,_2,_3)]
      // ... etc
      );

However, as you can see, it's not so flexible! Variadics are likely not what you wanted, because variadics imply statically known numbers of arguments. Something that depends on runtime input (being parsed) can never fit into the 'statically known' category. So I'd suggest this:

    function_call_ = 
        no_case[L"TakeOne"] > 
        ('(' > expr_ % ',' > ')') [_val=TakeOne_(_1)];

So you're passing a std::vector<GenericValue> instead. Now, TakeOne becomes a breeze:

GenericValue TakeOne(std::vector<GenericValue> const& arglist) {
    assert(!arglist.empty());
    for (auto& v : arglist)
        if (!CheckIfGenericValIsNull(v))
            return v;
    return arglist.back(); // last is the default
}
BOOST_PHOENIX_ADAPT_FUNCTION(GenericValue, TakeOne_, TakeOne, 1)

Bonus:

To simplify, here's the visitor re-imagined:

struct IsNull : boost::static_visitor<bool> {
    bool operator()(double num) const { 
        return (num == NumValueDoubleNull);
    }
    bool operator()(std::wstring const& s) const { 
        return boost::iequals(s, ParserNullChar);
    }
};

GenericValue TakeOne(std::vector<GenericValue> const& arglist) {
    assert(!arglist.empty());
    for (auto& v : arglist)
        if (!boost::apply_visitor(IsNull(), v))
            return v;
    return arglist.back(); // last is the default
}
BOOST_PHOENIX_ADAPT_FUNCTION(GenericValue, TakeOne_, TakeOne, 1)

That "fix" alone saves you ~51 LoC (112 vs. 163). See it Live on Coliru