52 lines
1.1 KiB
Python
52 lines
1.1 KiB
Python
#!/usr/bin/env python
|
|
import string
|
|
|
|
TOKEN_ID ="IDENT"
|
|
TOKEN_NUM="NUMBER"
|
|
|
|
def lex(s):
|
|
end = object()
|
|
table = [
|
|
{string.ascii_letters: 1, string.whitespace: 2, string.digits:3},
|
|
{string.ascii_letters+string.digits: 1},
|
|
{string.whitespace: 2},
|
|
{string.digits: 3}
|
|
]
|
|
finish = [
|
|
None,
|
|
lambda s: (TOKEN_ID, s),
|
|
lambda s: None,
|
|
lambda s: (TOKEN_NUM, int(s))
|
|
]
|
|
|
|
state = 0
|
|
it = iter(s)
|
|
c = next(it, end)
|
|
s = ""
|
|
while c != end:
|
|
found = False
|
|
for cs, target in table[state].items():
|
|
if c in cs:
|
|
s += c
|
|
c = next(it, end)
|
|
state = target
|
|
found = ( c != end )
|
|
break
|
|
if not found:
|
|
f = finish[state]
|
|
if f is None:
|
|
raise Exception("Unknown character %s" % s)
|
|
t = finish[state](s)
|
|
if t is not None:
|
|
yield t
|
|
state = 0
|
|
s = ""
|
|
|
|
|
|
def main():
|
|
s = "Hallo 2 Welt"
|
|
for token in lex(s):
|
|
print(token)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|