I am parsing a custom meta data file where the separator is a space ' '
. The meta data file contains strings where the space separator should be omitted. So "\"This Space\""
is one token and "This Space"
should be two tokens.
There is a questions like this here with an answer on how to get a result without using boost::tokenizer
. This seems like a default task for a tokeniser and i assume that this should be possible using boost::tokenizer
.
I wrote an example to show what i did so far:
#include <boost/tokenizer.hpp>
#include <vector>
#include <string>
#include <iostream>
using std::string;
using data = std::vector<string>;
data buildExpected()
{
string s[] = {"This", "is one of", "42", "lines" };
return data(s, s + 4);
}
data tokenizeLine(string line)
{
using namespace boost;
data d;
char_separator<char> sep("\" ");
tokenizer<char_separator<char>> tokens(line, sep);
for (string tok : tokens) d.push_back(tok);
return d;
}
void logData(string id, data &d)
{
string line = "(" + id + "):";
bool more = 0;
for (auto s : d)
{
if (more) line += "; ";
more = 1;
line += s;
}
std::cout << line << std::endl;
}
void main()
{
string line = "This \"is one of\" 42 lines";
data expected = buildExpected();
data actual = tokenizeLine(line);
logData("expected", expected);
logData("actual ", actual);
}
This is the output on my system: