Lexer fsm style
This commit is contained in:
parent
0f9d904c71
commit
f72c297c95
1 changed files with 52 additions and 0 deletions
52
lexer-fsm.py
Normal file
52
lexer-fsm.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
#!/usr/bin/env python
|
||||
import string
|
||||
|
||||
TOKEN_ID ="IDENT"
|
||||
TOKEN_NUM="NUMBER"
|
||||
|
||||
def lex(s):
|
||||
end = object()
|
||||
table = [
|
||||
{string.ascii_letters: 1, string.whitespace: 2, string.digits:3},
|
||||
{string.ascii_letters+string.digits: 1},
|
||||
{string.whitespace: 2},
|
||||
{string.digits: 3}
|
||||
]
|
||||
finish = [
|
||||
None,
|
||||
lambda s: (TOKEN_ID, s),
|
||||
lambda s: None,
|
||||
lambda s: (TOKEN_NUM, int(s))
|
||||
]
|
||||
|
||||
state = 0
|
||||
it = iter(s)
|
||||
c = next(it, end)
|
||||
s = ""
|
||||
while c != end:
|
||||
found = False
|
||||
for cs, target in table[state].items():
|
||||
if c in cs:
|
||||
s += c
|
||||
c = next(it, end)
|
||||
state = target
|
||||
found = ( c != end )
|
||||
break
|
||||
if not found:
|
||||
f = finish[state]
|
||||
if f is None:
|
||||
raise Exception("Unknown character %s" % s)
|
||||
t = finish[state](s)
|
||||
if t is not None:
|
||||
yield t
|
||||
state = 0
|
||||
s = ""
|
||||
|
||||
|
||||
def main():
|
||||
s = "Hallo 2 Welt"
|
||||
for token in lex(s):
|
||||
print(token)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in a new issue