one-file-projects/markovfun.py

#!/usr/bin/env python
from sys import argv
from random import randint, choice

class MarkovGenerator:
    overall = object()
    def __init__(self):
        self.distribution = {}

    def add(self, first, second):
        if first not in self.distribution:
            self.distribution[first] = { MarkovGenerator.overall : 0 }
        if second not in self.distribution[first]:
            self.distribution[first][second] = 0

        self.distribution[first][MarkovGenerator.overall] += 1
        self.distribution[first][second] += 1


    def getrandomfollower(self,word):
        i = randint(0,self.distribution[word][MarkovGenerator.overall]-1)
        for follower, occurances in self.distribution[word].items():
            if follower == MarkovGenerator.overall:
                continue
            if i < occurances:
                return follower
            else:
                i -= occurances
        return None

    def scantext(self,text):
        prevtoken = None
        while len(text) > 0:
            parts = text.split(" ",1)
            if len(parts) == 1:
                text = ""
                token = parts[0]
            else:
                token, text = parts
            token = token.strip(".,!?\"()[]{}\n")
            if prevtoken is not None:
                self.add(prevtoken,token)
            prevtoken = token

    def getrandomword(self):
        return choice(list(self.distribution.keys()))

    def generate(self, n):
        word = self.getrandomword()
        text = word
        for i in range(1,n):
            word = self.getrandomfollower(word)
            if word is None:
                word = self.getrandomword()
                text += ". " + word
            else:
                text += " " + word

        return text

    def debug(self):
        print("\n".join(self.distribution.keys()))


def main():
    if len(argv) > 1:
        filename = argv[1]
    else:
        filename = "test.txt"
    text = open(filename,"r").read()
    mg = MarkovGenerator()
    mg.scantext(text)
    print(mg.generate(100))


if __name__ == '__main__':
    main()