I have a very simple path construct that I am trying to parse with boost spirit.lex.
We have the following grammar:
token := [a-z]+
path := (token : path) | (token)
So we're just talking about colon separated lower-case ASCII strings here.
I have three examples "xyz", "abc:xyz", "abc:xyz:".
The first two should be deemed valid. The third one, which has a trailing colon, should not be deemed valid. Unfortunately the parser I have recognizes all three as being valid. The grammar should not allow an empty token, but apparently spirit is doing just that. What am I missing to get the third one rejected?
Also, if you read the code below, in comments there is another version of the parser that demands that all paths end with semi-colons. I can get appropriate behavior when I activate those lines, (i.e. rejection of "abc:xyz:;"), but this is not really what I want.
Anyone have any ideas?
Thanks.
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <iostream>
#include <string>
using namespace boost::spirit;
using boost::phoenix::val;
template<typename Lexer>
struct PathTokens : boost::spirit::lex::lexer<Lexer>
{
PathTokens()
{
identifier = "[a-z]+";
separator = ":";
this->self.add
(identifier)
(separator)
(';')
;
}
boost::spirit::lex::token_def<std::string> identifier, separator;
};
template <typename Iterator>
struct PathGrammar
: boost::spirit::qi::grammar<Iterator>
{
template <typename TokenDef>
PathGrammar(TokenDef const& tok)
: PathGrammar::base_type(path)
{
using boost::spirit::_val;
path
=
(token >> tok.separator >> path)[std::cerr << _1 << "\n"]
|
//(token >> ';')[std::cerr << _1 << "\n"]
(token)[std::cerr << _1 << "\n"]
;
token
= (tok.identifier) [_val=_1]
;
}
boost::spirit::qi::rule<Iterator> path;
boost::spirit::qi::rule<Iterator, std::string()> token;
};
int main()
{
typedef std::string::iterator BaseIteratorType;
typedef boost::spirit::lex::lexertl::token<BaseIteratorType, boost::mpl::vector<std::string> > TokenType;
typedef boost::spirit::lex::lexertl::lexer<TokenType> LexerType;
typedef PathTokens<LexerType>::iterator_type TokensIterator;
typedef std::vector<std::string> Tests;
Tests paths;
paths.push_back("abc");
paths.push_back("abc:xyz");
paths.push_back("abc:xyz:");
/*
paths.clear();
paths.push_back("abc;");
paths.push_back("abc:xyz;");
paths.push_back("abc:xyz:;");
*/
for ( Tests::iterator iter = paths.begin(); iter != paths.end(); ++iter )
{
std::string str = *iter;
std::cerr << "*****" << str << "*****\n";
PathTokens<LexerType> tokens;
PathGrammar<TokensIterator> grammar(tokens);
BaseIteratorType first = str.begin();
BaseIteratorType last = str.end();
bool r = boost::spirit::lex::tokenize_and_parse(first, last, tokens, grammar);
std::cerr << r << " " << (first==last) << "\n";
}
}