All I want is a simple tokenizer. And I would like to run functions by calling eval()
. So that's what I did for my project.
Here's the result:
>>> tokenizer('func 123 abc')
[('func', 'func'), ('arg', '123'), ('arg', 'abc')]
>>> tokenizer('func 123.5 abc')
[('func', 'func'), ('arg', '123.5'), ('arg', 'abc')]
>>> tokenizer('func 123.5 abc "Hello, World!"')
[('func', 'func'), ('arg', '123.5'), ('arg', 'abc'), ('arg', 'Hello, World!')]
>>> tokenizer("func 123.5 abc 'Hello, World!'")
[('func', 'func'), ('arg', '123.5'), ('arg', 'abc'), ('arg', 'Hello, World!')]
Attentsion: This may not suitable for everyone, this's not a complete parser or tokenizer.
Code:
def isNumber(cmd):
try:
int(cmd)
return True
except ValueError:
try:
float(cmd)
return True
except ValueError:
return False
return False
def isWord(cmd):
if len(cmd) == 0:
return False
if cmd[0].isalpha():
for i in cmd[1:]:
if not i.isalpha() and i != '_' and i != '-':
return False
return True
return False
def spaceParser(cmd):
i = 0
for i in range(len(cmd)):
if cmd[i] == ' ':
continue
break
return cmd[i:]
def funcNameParser(cmd):
cmd = spaceParser(cmd)
i = 0
word = ''
for i in range(len(cmd)):
if cmd[i] != ' ':
word += cmd[i]
else:
break
if i + 1 > len(word):
return (word, cmd[i:])
return (word, cmd[i+1:])
def argumentParser(cmd):
cmd = spaceParser(cmd)
if cmd[0] == '\'':
word = ''
i = 0
for i in range(1, len(cmd)):
if cmd[i] != '\'':
word += cmd[i]
else:
return (word, cmd[i+1:])
assert False, 'Fatal exception: String not finished.'
if cmd[0] == '"':
word = ''
i = 0
for i in range(1, len(cmd)):
if cmd[i] != '"':
word += cmd[i]
else:
return (word, cmd[i+1:])
assert False, 'Fatal exception: String not finished.'
i = 0
word = ''
for i in range(len(cmd)):
if cmd[i] != ' ':
word += cmd[i]
else:
break
assert isWord(word) or isNumber(word), 'Fatal exception: Not a valid name.'
if i + 1 > len(word):
return (word, cmd[i:])
return (word, cmd[i+1:])
def tokenizer(cmd):
token = []
result = funcNameParser(cmd)
token += [('func', result[0])]
while len(result[1]) != 0:
result = argumentParser(result[1])
token += [('arg', result[0])]
return token