I wrote the following code which crashes with boost-1.78;
While, I replace std::string input = "geo_dip_subdivision:(+国 -民)";
with std::string input = "geo_dip_subdivision:(+1 -2)";
, it runs as expected.
Also, it runs as expected with boost-1.67 and std::string input = "geo_dip_subdivision:(+国 -民)";
So, it is a problem related to Unicode. But I don't know what is the problem, and why it seems running as expected in boost-1.67.
Any help?
#include <string.h>
#define BOOST_SPIRIT_UNICODE
#include <boost/phoenix.hpp>
#include <boost/phoenix/operator.hpp>
#include <boost/spirit/include/qi.hpp>
namespace DB {
using std::vector;
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
#define str_pattern (('"' > *(qi::unicode::char_ - ('"')) | "\\\"") > '"')
#define sym_open (char_('[') | char_('{'))
#define sym_close (char_(']') | char_('}'))
struct query_tree;
typedef boost::variant<std::string, query_tree> node;
struct query_tree {
vector<node> must;
vector<node> must_not;
vector<node> should;
query_tree() = default;
query_tree(int type, query_tree& old, const query_tree& v)
{
if (type == 3) {
assert(old.should.size() == 0 || old.must.size() == 0);
assert(v.should.size() + v.must.size() + v.must_not.size() == 1);
if (old.should.size() > 0) {
must = std::move(old.should);
} else {
must = std::move(old.must);
}
must_not = std::move(old.must_not);
if (v.should.size() > 0) {
must.push_back(v.should[0]);
} else if (v.must.size() > 0) {
must.push_back(v.must[0]);
} else {
must_not.push_back(v.must_not[0]);
}
} else {
must = std::move(old.must);
must_not = std::move(old.must_not);
should = std::move(old.should);
push_back(type, v);
}
}
query_tree(int type, const std::string& n) { push_back(type, n); }
template <typename T> void push_back(int type, const T& v)
{
if (type == 0) {
must.push_back(v);
} else if (type == 1) {
must_not.push_back(v);
} else {
should.push_back(v);
}
}
query_tree(query_tree& old, const query_tree& v)
{
must = std::move(old.must);
for (size_t i = 0; i < v.must.size(); i++) {
must.push_back(v.must[i]);
}
must_not = std::move(old.must_not);
for (size_t i = 0; i < v.must_not.size(); i++) {
must_not.push_back(v.must_not[i]);
}
should = std::move(old.should);
for (size_t i = 0; i < v.should.size(); i++) {
should.push_back(v.should[i]);
}
}
};
template <typename It, typename Skipper = qi::space_type>
struct parser : qi::grammar<It, query_tree(), Skipper> {
parser() : parser::base_type(query)
{
using namespace qi;
part1 = raw[lexeme[*(str_pattern | qi::unicode::char_ - (char_(')') | char_('(')))]];
part2 = part1[_val = _1] > *(parenthese[_val = _val + _1]) >
(char_(')')[_val = _val + _1] | part2[_val = _val + _1]);
parenthese = char_('(')[_val = _1] > part2[_val = _val + _1];
range = raw[lexeme[sym_open > *(char_ - sym_close) > sym_close]];
name = raw[lexeme[+(qi::unicode::char_ - (':' | space | ')')) > ':']];
other_value = raw[lexeme[+(qi::unicode::char_ - space - ')')]];
string_value = raw[lexeme[str_pattern]];
field =
name[_val = _1] > (string_value | parenthese | range | other_value)[_val = _val + _1];
group = '(' > query > ')';
must = "+" > (group[_val = _1] | field[_val = phx::construct<query_tree>(0, _1)]);
must_not = (string("-") | string("NOT")) >
(group[_val = _1] | field[_val = phx::construct<query_tree>(0, _1)]);
should = group[_val = _1] | field[_val = phx::construct<query_tree>(2, _1)];
expr = (must[_val = phx::construct<query_tree>(0, _val, _1)] |
must_not[_val = phx::construct<query_tree>(1, _val, _1)] |
should[_val = phx::construct<query_tree>(2, _val, _1)]);
And = expr[_val = phx::construct<query_tree>(_val, _1)] >
*((string("AND") | string("&&")) >
expr[_val = phx::construct<query_tree>(3, _val, _1)]);
Or = And[_val = _1] >
*((string("OR") | string("||")) > And[_val = phx::construct<query_tree>(_val, _1)]);
query = *(Or[_val = phx::construct<query_tree>(_val, _1)]);
}
private:
qi::rule<It, std::string(), Skipper> field, name, string_value, other_value;
qi::rule<It, std::string(), qi::no_skip_type> parenthese, part1, part2, range;
qi::rule<It, query_tree(), Skipper> must, must_not, should, query, expr, group, And, Or;
};
std::string parse_from_lucene(std::string& input)
{
auto f(std::begin(input)), l(std::end(input));
parser<decltype(f)> p;
std::string str;
try {
query_tree result;
bool ok = qi::phrase_parse(f, l, p, qi::space, result);
if (!ok) {
throw "invalid input: " + input;
}
} catch (const qi::expectation_failure<decltype(f)>& e) {
throw "expectation_failure at '" + std::string(e.first, e.last) + "'\n";
}
return str;
}
};
int main()
{
std::string input = "geo_dip_subdivision:(+国 -民)";
input = DB::parse_from_lucene(input);
std::cout << input << std::endl;
return 0;
}