5

I ran into an odd issue writing a parser using Spirit Qi: I have a bug somewhere that is causing crashes with -O optimizations, but isn't with no optimizations. It crashes inside the constructor of the grammar:

template <typename Iterator>
struct math_expression_grammar : qi::grammar<Iterator, std::string()>
{
    qi::rule<Iterator, std::string()>
        expression,
        term,
        factorial,
        factor,
        pexpression,
        pfactor,
        nfactor,
        number;

    math_expression_grammar():
        math_expression_grammar::base_type(expression)
    {
        using namespace boost::spirit;
        using namespace boost::spirit::ascii;
        namespace sp = boost::spirit;
        namespace ph = boost::phoenix;

        auto sum = term[_val = sp::_1] >> lit('+') >> term[_val += sp::_1, _val += "+ "];
        auto difference = term[_val = sp::_1] >> lit('-') >> term[_val += sp::_1, _val += "- "];
        auto product = factor[_val = sp::_1] >> lit('*') >> factor[_val += sp::_1, _val += "* "];
        auto dividend = factor[_val = sp::_1] >> lit('/') >> factor[_val += sp::_1, _val += "/ "];

        expression = sum |
                     difference |
                     term;

        term = product |
               dividend |
               factor;

        pfactor = factorial.alias();
        nfactor = (lit('-') >> pfactor)[_val = sp::_1 + "n "];
        factor = nfactor | pfactor;

        pexpression = lit('(') >> expression >> lit(')');

        factorial = (pexpression | number)[_val = sp::_1] >> -lit('!')[_val += "! "];

        number = sp::double_[_val = ph::bind(stringize<double>, sp::_1) + ' '];
    }
};

I got to test it on TDM GCC 4.8.2 on Windows 64 bit, and GCC 4.9.0 on Arch Linux 64 bit; both had the same issue. Here's the relevant portion of the Valgrind trace, with optimizations turned on:

==15671== Use of uninitialised value of size 8
==15671==    at 0x4040DA: void boost::spirit::qi::rule<__gnu_cxx::__normal_iterator<char*, std::string>, std::string (), boost::spirit::unused_type, boost::spirit::unused_type, boost::spiri
t::unused_type>::define<mpl_::bool_<false>, boost::proto::exprns_::expr<boost::proto::tagns_::tag::bitwise_or, boost::proto::argsns_::list2<boost::proto::exprns_::expr<boost::proto::tagns_:
:tag::bitwise_or, boost::proto::argsns_::list2<boost::proto::exprns_::expr<boost::proto::tagns_::tag::shift_right, boost::proto::argsns_::list2<boost::proto::exprns_::expr<boost::proto::tag
ns_::tag::shift_right, boost::proto::argsns_::list2<boost::proto::exprns_::expr<boost::proto::tagns_::tag::subscript, boost::proto::argsns_::list2<boost::spirit::qi::rule<__gnu_cxx::__norma
l_iterator<char*, std::string>, std::string (), boost::spirit::unused_type, boost::spirit::unused_type, boost::spirit::unused_type>&, boost::proto::exprns_::expr<boost::proto::tagns_::tag::
terminal, boost::proto::argsns_::term<boost::phoenix::actor<boost::phoenix::composite<boost::phoenix::assign_eval, boost::fusion::vector<boost::spirit::attribute<0>, boost::spirit::argument
<0>, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_> > > cons
t&>, 0l> >, 2l> const&, boost::proto::exprns_::expr<boost::proto::tagns_::tag::terminal, boost::proto::argsns_::term<boost::spirit::terminal_ex<boost::spirit::tag::lit, boost::fusion::vecto
r1<char> > >, 0l> const&>, 2l> const&, boost::proto::exprns_::expr<boost::proto::tagns_::tag::subscript, boost::proto::argsns_::list2<boost::spirit::qi::rule<__gnu_cxx::__normal_iterator<ch
ar*, std::string>, std::string (), boost::spirit::unused_type, boost::spirit::unused_type, boost::spirit::unused_type>&, boost::proto::exprns_::expr<boost::proto::tagns_::tag::terminal, boo
st::proto::argsns_::term<boost::phoenix::actor<boost::phoenix::composite<boost::phoenix::sequence_eval, boost::fusion::vector<boost::phoenix::composite<boost::phoenix::plus_assign_eval, boo
st::fusion::vector<boost::spirit::attribute<0>, boost::spirit::argument<0>, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boo
st::fusion::void_, boost::fusion::void_, boost::fusion::void_> >, boost::phoenix::composite<boost::phoenix::plus_assign_eval, boost::fusion::vector<boost::spirit::attribute<0>, boost::phoen
ix::value<char const*>, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusi
on::void_> >, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_>
 > > const&>, 0l> >, 2l> const&>, 2l>&, boost::proto::exprns_::expr<boost::proto::tagns_::tag::shift_right, boost::proto::argsns_::list2<boost::proto::exprns_::expr<boost::proto::tagns_::ta
g::shift_right, boost::proto::argsns_::list2<boost::proto::exprns_::expr<boost::proto::tagns_::tag::subscript, boost::proto::argsns_::list2<boost::spirit::qi::rule<__gnu_cxx::__normal_itera
tor<char*, std::string>, std::string (), boost::spirit::unused_type, boost::spirit::unused_type, boost::spirit::unused_type>&, boost::proto::exprns_::expr<boost::proto::tagns_::tag::termina
l, boost::proto::argsns_::term<boost::phoenix::actor<boost::phoenix::composite<boost::phoenix::assign_eval, boost::fusion::vector<boost::spirit::attribute<0>, boost::spirit::argument<0>, bo
ost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_, boost::fusion::void_> > > const&>, 0l
> >, 2l> const&, boost::proto::exprns_::expr<boost::proto::tagns_::tag::terminal, boost::proto::argsns_::term<boost::spirit::terminal_ex<boost::spirit::tag::lit, boost::fusion::vector1<char
> > >, 0l> const&>, 2l> const&, boost::proto::exprns_::expr<boost::proto::tagns_::tag::subscript, boost::proto::argsns_::list2<boost::spirit::
==15671==    by 0x404BD9: math_expression_grammar<__gnu_cxx::__normal_iterator<char*, std::string> >::math_expression_grammar() (in /home/collin/programming/parser/parser)
==15671==    by 0x401E6A: main (in /home/collin/programming/parser/parser)
==15671==  Uninitialised value was created by a stack allocation
==15671==    at 0x404672: math_expression_grammar<__gnu_cxx::__normal_iterator<char*, std::string> >::math_expression_grammar() (in /home/collin/programming/parser/parser)

and here's the entire log with optimizations turned off:

==15686== Memcheck, a memory error detector
==15686== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
==15686== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
==15686== Command: ./parser
==15686==

==15686==
==15686== FILE DESCRIPTORS: 3 open at exit.
==15686== Open file descriptor 2: /dev/pts/5
==15686==    <inherited from parent>
==15686==
==15686== Open file descriptor 1: /dev/pts/5
==15686==    <inherited from parent>
==15686==
==15686== Open file descriptor 0: /dev/pts/5
==15686==    <inherited from parent>
==15686==
==15686==
==15686== HEAP SUMMARY:
==15686==     in use at exit: 0 bytes in 0 blocks
==15686==   total heap usage: 14 allocs, 14 frees, 776 bytes allocated
==15686==
==15686== All heap blocks were freed -- no leaks are possible
==15686==
==15686== For counts of detected and suppressed errors, rerun with: -v
==15686== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)

There was no change in any of my code between the two tests.

I'm having trouble actually debugging the issue since the bug strikes only with -O optimizations or higher, but not -Og unfortunately. I do have a suspicion the bug MAY be in Boost.Spirit, but I am also very unsure; I don't see anything wrong in my code, but I may be missing something or using Spirit wrong. Can someone more experienced point me in the right direction?

Here's the entire compile-able code:

#include <iostream>
#include <string>
#include <vector>
#include <stack>
#include <algorithm>
#include <utility>

#include <boost/mpl/vector.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_real.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/bind.hpp>
#include <boost/ref.hpp>

namespace qi = boost::spirit::qi;

template <typename NumType, typename Iterator>
NumType inline parse_number(Iterator first, Iterator last)
{
    using namespace std;

    istringstream extractor(string(first, last));
    NumType num;
    extractor >> num;

    return num;
}

template <typename NumType = double, typename Iterator>
NumType eval_rpn(Iterator head, Iterator last)
{
    using namespace std;

    const char tokens[] = {'+', '-', '*', '/', '^', 'n', ' '};
    auto tokens_begin = begin(tokens), tokens_end = end(tokens);

    stack<NumType> num_stack;

    while(head != last)
    {
        auto next = find_first_of(head, last, tokens_begin, tokens_end);

        if(head != next) num_stack.push(parse_number<NumType>(head, next));

        if(next != last)
        {
            NumType temp;

            switch(*next)
            {
            case '+':
                temp = num_stack.top();
                num_stack.pop();
                num_stack.top() += temp;
                break;

            case '-':
                temp = num_stack.top();
                num_stack.pop();
                num_stack.top() -= temp;
                break;

            case '*':
                temp = num_stack.top();
                num_stack.pop();
                num_stack.top() *= temp;
                break;

            case '/':
                temp = num_stack.top();
                num_stack.pop();
                num_stack.top() /= temp;
                break;

            case '^':
                temp = num_stack.top();
                num_stack.pop();
                num_stack.top() = pow(num_stack.top(), temp);
                break;

            case 'n':
                num_stack.top() = -num_stack.top();
                break;

            default:
                break; // Do nothing
            }

            head = next+1;
        }
        else head = last;
    }

    return num_stack.top();
}

template <typename T>
std::string stringize(T x)
{
    return std::to_string(x);
}

template <typename Iterator>
struct math_expression_grammar : qi::grammar<Iterator, std::string()>
{
    qi::rule<Iterator, std::string()>
        expression,
        term,
        factorial,
        factor,
        pexpression,
        pfactor,
        nfactor,
        number;

    math_expression_grammar():
        math_expression_grammar::base_type(expression)
    {
        using namespace boost::spirit;
        using namespace boost::spirit::ascii;
        namespace sp = boost::spirit;
        namespace ph = boost::phoenix;

        auto sum = term[_val = sp::_1] >> lit('+') >> term[_val += sp::_1, _val += "+ "];
        auto difference = term[_val = sp::_1] >> lit('-') >> term[_val += sp::_1, _val += "- "];
        auto product = factor[_val = sp::_1] >> lit('*') >> factor[_val += sp::_1, _val += "* "];
        auto dividend = factor[_val = sp::_1] >> lit('/') >> factor[_val += sp::_1, _val += "/ "];

        expression = sum |
                     difference |
                     term;

        term = product |
               dividend |
               factor;

        pfactor = factorial.alias();
        nfactor = (lit('-') >> pfactor)[_val = sp::_1 + "n "];
        factor = nfactor | pfactor;

        pexpression = lit('(') >> expression >> lit(')');

        factorial = (pexpression | number)[_val = sp::_1] >> -lit('!')[_val += "! "];

        number = sp::double_[_val = ph::bind(stringize<double>, sp::_1) + ' '];
    }
};

int main()
{
    using namespace std;

    math_expression_grammar<string::iterator> g;

    string input;
    getline(cin, input);
    while(input.size())
    {
        auto first = input.begin(), last = input.end();
        cout << input << endl;

        string result;
        if(!boost::spirit::qi::parse(first, last, g, result))
        {
            cout << "Error at " << last - first << ":\n\t" << *first << endl;
        }
        else
        {
            cout << result << endl;
            cout << eval_rpn(result.begin(), result.end()) << endl;
        }

        getline(cin, input);
    }
}
chbaker0
  • 1,758
  • 2
  • 13
  • 27
  • @MattMcNabb It crashes upon starting the program (as that's when the math_expression_grammar class is constructed), there are no inputs. Unless you meant something else? – chbaker0 Jun 20 '14 at 00:26
  • 1
    There's no `main()` in the code you posted. "testcase" means code that someone else can compile without making any changes, and reproduce your behaviour. That guarantees that we're both looking at the same thing. – M.M Jun 20 '14 at 00:39
  • @MattMcNabb OK, got it; I added it. – chbaker0 Jun 20 '14 at 00:42
  • Is there any sample input/expected output? I can't tell what this is supposed to do. Why is there a semantic action every square inch? ("[Semantic actions are evil?](http://stackoverflow.com/questions/8259440/boost-spirit-semantic-actions-are-evil/8259585#8259585)"). Why do all rules expose `std::string`? How would you handle whitespace? – sehe Jun 20 '14 at 06:57
  • @sehe it's a work in progress – chbaker0 Jun 20 '14 at 13:32
  • @mebob that's okay, it just means I cannot fix it for you, but you already have the answer, so you can fix it yourself :) (there might be more details at play, but I would need to be able to do testruns and not knowing any valid input/output I can't see these.) – sehe Jun 20 '14 at 13:33

1 Answers1

4

You cannot (safely) use auto on Spirit expression templates. That leads to UB.

Will post a fixed version soonish(awaiting information). Meanwhile, see:

Community
  • 1
  • 1
sehe
  • 374,641
  • 47
  • 450
  • 633
  • Thank you, that fixed it easily. The only reason why I have so many semantic actions right now is because I'm "debugging" the grammar – chbaker0 Jun 21 '14 at 16:29