Lexer fsm style
This commit is contained in:
parent
0f9d904c71
commit
f72c297c95
1 changed files with 52 additions and 0 deletions
52
lexer-fsm.py
Normal file
52
lexer-fsm.py
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
import string
|
||||||
|
|
||||||
|
TOKEN_ID ="IDENT"
|
||||||
|
TOKEN_NUM="NUMBER"
|
||||||
|
|
||||||
|
def lex(s):
|
||||||
|
end = object()
|
||||||
|
table = [
|
||||||
|
{string.ascii_letters: 1, string.whitespace: 2, string.digits:3},
|
||||||
|
{string.ascii_letters+string.digits: 1},
|
||||||
|
{string.whitespace: 2},
|
||||||
|
{string.digits: 3}
|
||||||
|
]
|
||||||
|
finish = [
|
||||||
|
None,
|
||||||
|
lambda s: (TOKEN_ID, s),
|
||||||
|
lambda s: None,
|
||||||
|
lambda s: (TOKEN_NUM, int(s))
|
||||||
|
]
|
||||||
|
|
||||||
|
state = 0
|
||||||
|
it = iter(s)
|
||||||
|
c = next(it, end)
|
||||||
|
s = ""
|
||||||
|
while c != end:
|
||||||
|
found = False
|
||||||
|
for cs, target in table[state].items():
|
||||||
|
if c in cs:
|
||||||
|
s += c
|
||||||
|
c = next(it, end)
|
||||||
|
state = target
|
||||||
|
found = ( c != end )
|
||||||
|
break
|
||||||
|
if not found:
|
||||||
|
f = finish[state]
|
||||||
|
if f is None:
|
||||||
|
raise Exception("Unknown character %s" % s)
|
||||||
|
t = finish[state](s)
|
||||||
|
if t is not None:
|
||||||
|
yield t
|
||||||
|
state = 0
|
||||||
|
s = ""
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
s = "Hallo 2 Welt"
|
||||||
|
for token in lex(s):
|
||||||
|
print(token)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Loading…
Reference in a new issue