0

I'm playing with an implementation of a generic Tokenizer class wrapped inside a tok namespace.

template <typename TokenEnum>
class Tokenizer {
public:
    struct Token {
        // ...
    };

    class Context {
        // ...
    };

    // Important for later.
    using MatchFunc = std::function<std::size_t(Context &ctx)>;

    // ...

private:
    // ...

};

At the moment, I can declare methods and member variables for the Tokenizer class alright.

// Example method of Tokenizer
template <typename TokenEnum>
void Tokenizer<TokenEnum>::addMatchFunc(MatchFunc func, TokenEnum type)
{
    // ...
}

Although sometimes I've had to add the typename keyword in front of some types for certain methods of Tokenizer for reasons I have half understood from StackOverflow.

// Does not compile unless I specify typename near "std::vector<typename Tokenizer<TokenEnum>::Token>>"
template <typename TokenEnum>
std::unique_ptr<std::vector<typename Tokenizer<TokenEnum>::Token>> Tokenizer<TokenEnum>::tokenize(const std::string &rawString)
{
    // ...
}

I can also declare methods and member variables for the Tokenizer::Context nested class just fine.

// Example method of Tokenizer::Context
template <typename TokenEnum>
bool Tokenizer<TokenEnum>::Context::match(const std::string &comp) const
{
    // ...
}

Following that, I have defined a Matchers class outside the Tokenizer class. It contains a bunch of static functions which comply to Tokenizer::MatchFunc.

Here is the definition for the Matchers class.

#include "Tokenizer.hpp"

template <typename TokenEnum>
class Matchers {
public:
    static std::size_t whitespace(Tokenizer<TokenEnum>::Context &ctx);
    template <char charValue>
    static std::size_t character(Tokenizer<TokenEnum>::Context &ctx);
    template <char *stringValue>
    static std::size_t string(Tokenizer<TokenEnum>::Context &ctx);
    static std::size_t word(Tokenizer<TokenEnum>::Context &ctx);
    static std::size_t integer(Tokenizer<TokenEnum>::Context &ctx);

private:

};

This Matchers class fails to compile. For each method of Matchers, it throws the following errors.

In file included from tests/tok/Matchers.cpp:11:
deps/tok/Matchers.hpp:19:35: error: ‘Tokenizer<TokenEnum>::Context’ is not a type
   19 |     static std::size_t whitespace(Tokenizer<TokenEnum>::Context &ctx);
      | 
In file included from deps/tok/Matchers.hpp:33,
                 from tests/tok/Matchers.cpp:11:
deps/tok/Matchers.ipp:13:13: error: ‘std::size_t tok::Matchers<TokenEnum>::whitespace’ is not a static data member of ‘class tok::Matchers<TokenEnum>’
   13 | std::size_t Matchers<TokenEnum>::whitespace(Tokenizer<TokenEnum>::Context &ctx)
      |             ^~~~~~~~~~~~~~~~~~~
deps/tok/Matchers.ipp:13:67: error: template definition of non-template ‘std::size_t tok::Matchers<TokenEnum>::whitespace’
   13 | std::size_t Matchers<TokenEnum>::whitespace(Tokenizer<TokenEnum>::Context &ctx)
      | 
deps/tok/Matchers.ipp:13:76: error: ‘ctx’ was not declared in this scope
   13 | std::size_t Matchers<TokenEnum>::whitespace(Tokenizer<TokenEnum>::Context &ctx)
      |   

For reference, here is the full list of header files used.

Tokenizer.hpp

#pragma once

#include <string>
#include <vector>
#include <memory>
#include <functional>
#include <utility>

namespace tok {

template <typename TokenEnum>
class Tokenizer {
public:
    struct Token {
        Token(TokenEnum type, std::size_t start, std::size_t length);

        TokenEnum type;
        std::size_t start;
        std::size_t length;
    };

    class Context {
    public:
        Context(const std::vector<Token> &tokens, const std::string &rawString, const std::size_t &pos);
        bool isAtEnd() const;
        bool match(char character) const;
        bool match(const std::string &comp) const;
        bool matchDigit() const;
        bool matchAlpha() const;
        bool consume(char character);
        bool consume(const std::string &comp);
        bool consume(std::size_t chars = 1);
        bool consumeDigit();
        bool consumeAlpha();
        void reset();
        char currentChar() const;
        std::size_t result() const;

    private:
        const std::vector<Token> &tokens;
        const std::string &rawString;
        const std::size_t length;
        const std::size_t &pos;
        std::size_t index;
    };

    using MatchFunc = std::function<std::size_t(Context &ctx)>;

    void setUndefinedToken(TokenEnum type);
    void setWhitespaceMatchFunc(MatchFunc func);
    void addMatchFunc(MatchFunc func, TokenEnum type);
    std::unique_ptr<std::vector<Token>> tokenize(const std::string &rawString);

protected:

private:
    std::vector<std::pair<MatchFunc, TokenEnum>> matchers;
    MatchFunc whitespaceMatcher;
    TokenEnum undefinedToken;
    bool handlesWhitespace{false};

};

#include "Tokenizer.ipp"
#include "Context.ipp"
#include "Token.ipp"

}

Matchers.hpp

#pragma once

#include <cstdint>
#include "Tokenizer.hpp"

namespace tok {

template <typename TokenEnum>
class Matchers {
public:
    static std::size_t whitespace(Tokenizer<TokenEnum>::Context &ctx);
    template <char charValue>
    static std::size_t character(Tokenizer<TokenEnum>::Context &ctx);
    template <char *stringValue>
    static std::size_t string(Tokenizer<TokenEnum>::Context &ctx);
    static std::size_t word(Tokenizer<TokenEnum>::Context &ctx);
    static std::size_t integer(Tokenizer<TokenEnum>::Context &ctx);

protected:

private:

};

#include "Matchers.ipp"

}

Notes

Before implementing the Matchers class. The following compiled and ran fine.

enum class GolangToken {
    OpenBrace,
    ClosedBrace,
    Integer,
    For,
    While,
    // ...
};

tok::Tokenizer<GolangToken> tokenizer;
tokenizer.addMatchFunc(matchOpenBrace, GolangToken::OpenBrace);
tokenizer.addMatchFunc(matchClosedBrace, GolangToken::ClosedBrace);
tokenizer.addMatchFunc(matchInteger, GolangToken::Integer);
tokenizer.addMatchFunc(matchWhile, GolangToken::While);
tokenizer.addMatchFunc(matchFor, GolangToken::For);
auto tokens = tokenizer.tokenize("{}1293 for while");
Diego ROJAS
  • 509
  • 1
  • 7
  • 14

0 Answers0