Desired behaviour:
- Everything after a
'#'
is ignored (#
= comment).- Empty lines don't create tokens.
- '{' creates a token of type
BLOCK_OPEN
.- '}' creates a token of type
BLOCK_CLOSE
.- '=' creates a token of type
EQUALS
.- Everything else creates a token of type
LABEL
.- Tokens must not have empty space(s)
For most inputs, my tokenization function flawlessly. Except one bug:
show_position = { x=-9 y =78 }
Note the lack of spaces!
The vector returned is missing the "="
between the "x"
and the "-9"
.
How do I fix this bug? I tried debugging but couldn't figure out what I messed up. A fresh pair of eyes is a boon.
This is how I tokenize:
std::vector<Token> tokenizeLine(const std::string str)
{
std::vector<Token> tokens;
std::string::size_type start = 0;
std::string::size_type end = 0;
while (end != std::string::npos)
{
enum POSES
{
EQUALS,
OPEN,
CLOSE,
SPACE,
EOL,
RETURN,
TAB,
COMMENT,
POSES_SIZE
};
std::string::size_type pos[] =
{
str.find('=', start),
str.find('{', start),
str.find('}', start),
str.find(' ', start),
str.find('\n', start),
str.find('\r', start),
str.find('\t', start),
str.find('#', start)
};
end = *std::min_element(pos, &pos[POSES_SIZE]);
switch (str[start])
{
case('=') :
tokens.push_back(Token(Token::EQUALS, "="));
break;
case('{') :
tokens.push_back(Token(Token::BLOCK_OPEN, "{"));
break;
case('}') :
tokens.push_back(Token(Token::BLOCK_CLOSE, "}"));
break;
case(' ') :
case('\n') :
case('\r') :
case('\t'):
break;
case('#') :
return tokens;
break;
default:
if(str.substr(start, end - start).length() > 0)
tokens.push_back(Token(Token::LABEL, str.substr(start, end - start)));
}
// If at end, use start=maxSize. Else use start=end+delimiter.
start = ((end > (std::string::npos - sizeof(char)))
? std::string::npos : end + sizeof(char));
}
return tokens;
}
Here's one you can run in the comfort of your home:
std::vector<std::string> tokenizeLine(const std::string str)
{
std::vector<std::string> tokens;
std::string::size_type start = 0;
std::string::size_type end = 0;
while (end != std::string::npos)
{
enum POSES // Deliminators
{
EQUALS,
OPEN,
CLOSE,
SPACE,
EOL,
RETURN,
TAB,
COMMENT,
POSES_SIZE
};
std::string::size_type pos[] =
{
str.find('=', start),
str.find('{', start),
str.find('}', start),
str.find(' ', start),
str.find('\n', start),
str.find('\r', start),
str.find('\t', start),
str.find('#', start)
};
end = *std::min_element(pos, &pos[POSES_SIZE]);
switch (str[start])
{
case('=') :
tokens.push_back("=");
break;
case('{') :
tokens.push_back("{");
break;
case('}') :
tokens.push_back("}");
break;
case(' ') :
case('\n') :
case('\r') :
case('\t'):
break;
case('#') :
return tokens;
break;
default:
if(str.substr(start, end - start).length() > 0)
tokens.push_back(str.substr(start, end - start));
}
// If at end, use start=maxSize. Else use start=end+delimiter.
start = ((end > (std::string::npos - sizeof(char)))
? std::string::npos : end + sizeof(char));
}
return tokens;
}