| #! /usr/local/bin/python |
| |
| class Markov: |
| def __init__(self, histsize, choice): |
| self.histsize = histsize |
| self.choice = choice |
| self.trans = {} |
| def add(self, state, next): |
| if not self.trans.has_key(state): |
| self.trans[state] = [next] |
| else: |
| self.trans[state].append(next) |
| def put(self, seq): |
| n = self.histsize |
| add = self.add |
| add(None, seq[:0]) |
| for i in range(len(seq)): |
| add(seq[max(0, i-n):i], seq[i:i+1]) |
| add(seq[len(seq)-n:], None) |
| def get(self): |
| choice = self.choice |
| trans = self.trans |
| n = self.histsize |
| seq = choice(trans[None]) |
| while 1: |
| subseq = seq[max(0, len(seq)-n):] |
| options = trans[subseq] |
| next = choice(options) |
| if not next: break |
| seq = seq + next |
| return seq |
| |
| def test(): |
| import sys, string, whrandom, getopt |
| args = sys.argv[1:] |
| try: |
| opts, args = getopt.getopt(args, '0123456789cdw') |
| except getopt.error: |
| print 'Usage: markov [-#] [-cddqw] [file] ...' |
| print 'Options:' |
| print '-#: 1-digit history size (default 2)' |
| print '-c: characters (default)' |
| print '-w: words' |
| print '-d: more debugging output' |
| print '-q: no debugging output' |
| print 'Input files (default stdin) are split in paragraphs' |
| print 'separated blank lines and each paragraph is split' |
| print 'in words by whitespace, then reconcatenated with' |
| print 'exactly one space separating words.' |
| print 'Output consists of paragraphs separated by blank' |
| print 'lines, where lines are no longer than 72 characters.' |
| histsize = 2 |
| do_words = 0 |
| debug = 1 |
| for o, a in opts: |
| if '-0' <= o <= '-9': histsize = eval(o[1:]) |
| if o == '-c': do_words = 0 |
| if o == '-d': debug = debug + 1 |
| if o == '-q': debug = 0 |
| if o == '-w': do_words = 1 |
| if not args: args = ['-'] |
| m = Markov(histsize, whrandom.choice) |
| try: |
| for filename in args: |
| if filename == '-': |
| f = sys.stdin |
| if f.isatty(): |
| print 'Sorry, need stdin from file' |
| continue |
| else: |
| f = open(filename, 'r') |
| if debug: print 'processing', filename, '...' |
| text = f.read() |
| f.close() |
| paralist = string.splitfields(text, '\n\n') |
| for para in paralist: |
| if debug > 1: print 'feeding ...' |
| words = string.split(para) |
| if words: |
| if do_words: data = tuple(words) |
| else: data = string.joinfields(words, ' ') |
| m.put(data) |
| except KeyboardInterrupt: |
| print 'Interrupted -- continue with data read so far' |
| if not m.trans: |
| print 'No valid input files' |
| return |
| if debug: print 'done.' |
| if debug > 1: |
| for key in m.trans.keys(): |
| if key is None or len(key) < histsize: |
| print `key`, m.trans[key] |
| if histsize == 0: print `''`, m.trans[''] |
| print |
| while 1: |
| data = m.get() |
| if do_words: words = data |
| else: words = string.split(data) |
| n = 0 |
| limit = 72 |
| for w in words: |
| if n + len(w) > limit: |
| print |
| n = 0 |
| print w, |
| n = n + len(w) + 1 |
| print |
| print |
| |
| def tuple(list): |
| if len(list) == 0: return () |
| if len(list) == 1: return (list[0],) |
| i = len(list)/2 |
| return tuple(list[:i]) + tuple(list[i:]) |
| |
| test() |