When tokenizing in superpower, how to match a string only if it is the first thing in a line (note: this is a different question than this one) ?
For example, assume I have a language with only the following 4 characters (' ', ':', 'X', 'Y'), each of which is a token. There is also a 'Header' token to capture cases of the following regex pattern /^[XY]+:/ (any number of Xs and Ys followed by a colon, only if they start the line).
Here is a quick class for testing (the 4th test-case fails):
using System;
using Superpower;
using Superpower.Parsers;
using Superpower.Tokenizers;
public enum Tokens { Space, Colon, Header, X, Y }
public class XYTokenizer
{
static void Main(string[] args)
{
Test("X", Tokens.X);
Test("XY", Tokens.X, Tokens.Y);
Test("X Y:", Tokens.X, Tokens.Space, Tokens.Y, Tokens.Colon);
Test("X: X", Tokens.Header, Tokens.Space, Tokens.X);
}
public static readonly Tokenizer<Tokens> tokenizer = new TokenizerBuilder<Tokens>()
.Match(Character.EqualTo('X'), Tokens.X)
.Match(Character.EqualTo('Y'), Tokens.Y)
.Match(Character.EqualTo(':'), Tokens.Colon)
.Match(Character.EqualTo(' '), Tokens.Space)
.Build();
static void Test(string input, params Tokens[] expected)
{
var tokens = tokenizer.Tokenize(input);
var i = 0;
foreach (var t in tokens)
{
if (t.Kind != expected[i])
{
Console.WriteLine("tokens[" + i + "] was Tokens." + t.Kind
+ " not Tokens." + expected[i] + " for '" + input + "'");
return;
}
i++;
}
Console.WriteLine("OK");
}
}