78 lines
2.1 KiB
Python
78 lines
2.1 KiB
Python
#!/usr/bin/env python
|
|
from sys import argv
|
|
from random import randint, choice
|
|
|
|
class MarkovGenerator:
|
|
overall = object()
|
|
def __init__(self):
|
|
self.distribution = {}
|
|
|
|
def add(self, first, second):
|
|
if first not in self.distribution:
|
|
self.distribution[first] = { MarkovGenerator.overall : 0 }
|
|
if second not in self.distribution[first]:
|
|
self.distribution[first][second] = 0
|
|
|
|
self.distribution[first][MarkovGenerator.overall] += 1
|
|
self.distribution[first][second] += 1
|
|
|
|
|
|
def getrandomfollower(self,word):
|
|
i = randint(0,self.distribution[word][MarkovGenerator.overall]-1)
|
|
for follower, occurances in self.distribution[word].items():
|
|
if follower == MarkovGenerator.overall:
|
|
continue
|
|
if i < occurances:
|
|
return follower
|
|
else:
|
|
i -= occurances
|
|
return None
|
|
|
|
def scantext(self,text):
|
|
prevtoken = None
|
|
while len(text) > 0:
|
|
parts = text.split(" ",1)
|
|
if len(parts) == 1:
|
|
text = ""
|
|
token = parts[0]
|
|
else:
|
|
token, text = parts
|
|
token = token.strip(".,!?\"()[]{}\n")
|
|
if prevtoken is not None:
|
|
self.add(prevtoken,token)
|
|
prevtoken = token
|
|
|
|
def getrandomword(self):
|
|
return choice(list(self.distribution.keys()))
|
|
|
|
def generate(self, n):
|
|
word = self.getrandomword()
|
|
text = word
|
|
for i in range(1,n):
|
|
word = self.getrandomfollower(word)
|
|
if word is None:
|
|
word = self.getrandomword()
|
|
text += ". " + word
|
|
else:
|
|
text += " " + word
|
|
|
|
return text
|
|
|
|
def debug(self):
|
|
print("\n".join(self.distribution.keys()))
|
|
|
|
|
|
def main():
|
|
if len(argv) > 1:
|
|
filename = argv[1]
|
|
else:
|
|
filename = "test.txt"
|
|
text = open(filename,"r").read()
|
|
mg = MarkovGenerator()
|
|
mg.scantext(text)
|
|
print(mg.generate(100))
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|