I'm playing with an implementation of a generic Tokenizer
class wrapped inside a tok
namespace.
template <typename TokenEnum>
class Tokenizer {
public:
struct Token {
// ...
};
class Context {
// ...
};
// Important for later.
using MatchFunc = std::function<std::size_t(Context &ctx)>;
// ...
private:
// ...
};
At the moment, I can declare methods and member variables for the Tokenizer
class alright.
// Example method of Tokenizer
template <typename TokenEnum>
void Tokenizer<TokenEnum>::addMatchFunc(MatchFunc func, TokenEnum type)
{
// ...
}
Although sometimes I've had to add the typename
keyword in front of some types for certain methods of Tokenizer
for reasons I have half understood from StackOverflow.
// Does not compile unless I specify typename near "std::vector<typename Tokenizer<TokenEnum>::Token>>"
template <typename TokenEnum>
std::unique_ptr<std::vector<typename Tokenizer<TokenEnum>::Token>> Tokenizer<TokenEnum>::tokenize(const std::string &rawString)
{
// ...
}
I can also declare methods and member variables for the Tokenizer::Context
nested class just fine.
// Example method of Tokenizer::Context
template <typename TokenEnum>
bool Tokenizer<TokenEnum>::Context::match(const std::string &comp) const
{
// ...
}
Following that, I have defined a Matchers
class outside the Tokenizer
class. It contains a bunch of static functions which comply to Tokenizer::MatchFunc
.
Here is the definition for the Matchers
class.
#include "Tokenizer.hpp"
template <typename TokenEnum>
class Matchers {
public:
static std::size_t whitespace(Tokenizer<TokenEnum>::Context &ctx);
template <char charValue>
static std::size_t character(Tokenizer<TokenEnum>::Context &ctx);
template <char *stringValue>
static std::size_t string(Tokenizer<TokenEnum>::Context &ctx);
static std::size_t word(Tokenizer<TokenEnum>::Context &ctx);
static std::size_t integer(Tokenizer<TokenEnum>::Context &ctx);
private:
};
This Matchers
class fails to compile. For each method of Matchers
, it throws the following errors.
In file included from tests/tok/Matchers.cpp:11:
deps/tok/Matchers.hpp:19:35: error: ‘Tokenizer<TokenEnum>::Context’ is not a type
19 | static std::size_t whitespace(Tokenizer<TokenEnum>::Context &ctx);
|
In file included from deps/tok/Matchers.hpp:33,
from tests/tok/Matchers.cpp:11:
deps/tok/Matchers.ipp:13:13: error: ‘std::size_t tok::Matchers<TokenEnum>::whitespace’ is not a static data member of ‘class tok::Matchers<TokenEnum>’
13 | std::size_t Matchers<TokenEnum>::whitespace(Tokenizer<TokenEnum>::Context &ctx)
| ^~~~~~~~~~~~~~~~~~~
deps/tok/Matchers.ipp:13:67: error: template definition of non-template ‘std::size_t tok::Matchers<TokenEnum>::whitespace’
13 | std::size_t Matchers<TokenEnum>::whitespace(Tokenizer<TokenEnum>::Context &ctx)
|
deps/tok/Matchers.ipp:13:76: error: ‘ctx’ was not declared in this scope
13 | std::size_t Matchers<TokenEnum>::whitespace(Tokenizer<TokenEnum>::Context &ctx)
|
For reference, here is the full list of header files used.
Tokenizer.hpp
#pragma once
#include <string>
#include <vector>
#include <memory>
#include <functional>
#include <utility>
namespace tok {
template <typename TokenEnum>
class Tokenizer {
public:
struct Token {
Token(TokenEnum type, std::size_t start, std::size_t length);
TokenEnum type;
std::size_t start;
std::size_t length;
};
class Context {
public:
Context(const std::vector<Token> &tokens, const std::string &rawString, const std::size_t &pos);
bool isAtEnd() const;
bool match(char character) const;
bool match(const std::string &comp) const;
bool matchDigit() const;
bool matchAlpha() const;
bool consume(char character);
bool consume(const std::string &comp);
bool consume(std::size_t chars = 1);
bool consumeDigit();
bool consumeAlpha();
void reset();
char currentChar() const;
std::size_t result() const;
private:
const std::vector<Token> &tokens;
const std::string &rawString;
const std::size_t length;
const std::size_t &pos;
std::size_t index;
};
using MatchFunc = std::function<std::size_t(Context &ctx)>;
void setUndefinedToken(TokenEnum type);
void setWhitespaceMatchFunc(MatchFunc func);
void addMatchFunc(MatchFunc func, TokenEnum type);
std::unique_ptr<std::vector<Token>> tokenize(const std::string &rawString);
protected:
private:
std::vector<std::pair<MatchFunc, TokenEnum>> matchers;
MatchFunc whitespaceMatcher;
TokenEnum undefinedToken;
bool handlesWhitespace{false};
};
#include "Tokenizer.ipp"
#include "Context.ipp"
#include "Token.ipp"
}
Matchers.hpp
#pragma once
#include <cstdint>
#include "Tokenizer.hpp"
namespace tok {
template <typename TokenEnum>
class Matchers {
public:
static std::size_t whitespace(Tokenizer<TokenEnum>::Context &ctx);
template <char charValue>
static std::size_t character(Tokenizer<TokenEnum>::Context &ctx);
template <char *stringValue>
static std::size_t string(Tokenizer<TokenEnum>::Context &ctx);
static std::size_t word(Tokenizer<TokenEnum>::Context &ctx);
static std::size_t integer(Tokenizer<TokenEnum>::Context &ctx);
protected:
private:
};
#include "Matchers.ipp"
}
Notes
Before implementing the Matchers
class. The following compiled and ran fine.
enum class GolangToken {
OpenBrace,
ClosedBrace,
Integer,
For,
While,
// ...
};
tok::Tokenizer<GolangToken> tokenizer;
tokenizer.addMatchFunc(matchOpenBrace, GolangToken::OpenBrace);
tokenizer.addMatchFunc(matchClosedBrace, GolangToken::ClosedBrace);
tokenizer.addMatchFunc(matchInteger, GolangToken::Integer);
tokenizer.addMatchFunc(matchWhile, GolangToken::While);
tokenizer.addMatchFunc(matchFor, GolangToken::For);
auto tokens = tokenizer.tokenize("{}1293 for while");