blob: df4dec0c5e53782800ec1b9291f8010164b8726f [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Guido van Rossum6930b3d1993-12-14 10:08:02 +00002
3class Markov:
Tim Peterse6ddc8b2004-07-18 05:56:09 +00004 def __init__(self, histsize, choice):
5 self.histsize = histsize
6 self.choice = choice
7 self.trans = {}
Georg Brandl86d38e92009-10-11 08:39:16 +00008
Tim Peterse6ddc8b2004-07-18 05:56:09 +00009 def add(self, state, next):
Georg Brandl86d38e92009-10-11 08:39:16 +000010 self.trans.setdefault(state, []).append(next)
11
Tim Peterse6ddc8b2004-07-18 05:56:09 +000012 def put(self, seq):
13 n = self.histsize
14 add = self.add
15 add(None, seq[:0])
16 for i in range(len(seq)):
17 add(seq[max(0, i-n):i], seq[i:i+1])
18 add(seq[len(seq)-n:], None)
Georg Brandl86d38e92009-10-11 08:39:16 +000019
Tim Peterse6ddc8b2004-07-18 05:56:09 +000020 def get(self):
21 choice = self.choice
22 trans = self.trans
23 n = self.histsize
24 seq = choice(trans[None])
Georg Brandl86d38e92009-10-11 08:39:16 +000025 while True:
Tim Peterse6ddc8b2004-07-18 05:56:09 +000026 subseq = seq[max(0, len(seq)-n):]
27 options = trans[subseq]
28 next = choice(options)
Georg Brandl86d38e92009-10-11 08:39:16 +000029 if not next:
30 break
31 seq += next
Tim Peterse6ddc8b2004-07-18 05:56:09 +000032 return seq
Guido van Rossum6930b3d1993-12-14 10:08:02 +000033
Georg Brandl86d38e92009-10-11 08:39:16 +000034
Guido van Rossum6930b3d1993-12-14 10:08:02 +000035def test():
Georg Brandl86d38e92009-10-11 08:39:16 +000036 import sys, random, getopt
Tim Peterse6ddc8b2004-07-18 05:56:09 +000037 args = sys.argv[1:]
38 try:
Georg Brandl86d38e92009-10-11 08:39:16 +000039 opts, args = getopt.getopt(args, '0123456789cdwq')
Tim Peterse6ddc8b2004-07-18 05:56:09 +000040 except getopt.error:
Georg Brandl86d38e92009-10-11 08:39:16 +000041 print 'Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0]
Tim Peterse6ddc8b2004-07-18 05:56:09 +000042 print 'Options:'
43 print '-#: 1-digit history size (default 2)'
44 print '-c: characters (default)'
45 print '-w: words'
46 print '-d: more debugging output'
47 print '-q: no debugging output'
48 print 'Input files (default stdin) are split in paragraphs'
49 print 'separated blank lines and each paragraph is split'
50 print 'in words by whitespace, then reconcatenated with'
51 print 'exactly one space separating words.'
52 print 'Output consists of paragraphs separated by blank'
53 print 'lines, where lines are no longer than 72 characters.'
Georg Brandl86d38e92009-10-11 08:39:16 +000054 sys.exit(2)
Tim Peterse6ddc8b2004-07-18 05:56:09 +000055 histsize = 2
Georg Brandl86d38e92009-10-11 08:39:16 +000056 do_words = False
Tim Peterse6ddc8b2004-07-18 05:56:09 +000057 debug = 1
58 for o, a in opts:
Georg Brandl86d38e92009-10-11 08:39:16 +000059 if '-0' <= o <= '-9': histsize = int(o[1:])
60 if o == '-c': do_words = False
61 if o == '-d': debug += 1
Tim Peterse6ddc8b2004-07-18 05:56:09 +000062 if o == '-q': debug = 0
Georg Brandl86d38e92009-10-11 08:39:16 +000063 if o == '-w': do_words = True
64 if not args:
65 args = ['-']
66
Tim Peterse6ddc8b2004-07-18 05:56:09 +000067 m = Markov(histsize, random.choice)
68 try:
69 for filename in args:
70 if filename == '-':
71 f = sys.stdin
72 if f.isatty():
73 print 'Sorry, need stdin from file'
74 continue
75 else:
76 f = open(filename, 'r')
77 if debug: print 'processing', filename, '...'
78 text = f.read()
79 f.close()
Georg Brandl86d38e92009-10-11 08:39:16 +000080 paralist = text.split('\n\n')
Tim Peterse6ddc8b2004-07-18 05:56:09 +000081 for para in paralist:
82 if debug > 1: print 'feeding ...'
Georg Brandl86d38e92009-10-11 08:39:16 +000083 words = para.split()
Tim Peterse6ddc8b2004-07-18 05:56:09 +000084 if words:
Georg Brandl86d38e92009-10-11 08:39:16 +000085 if do_words:
86 data = tuple(words)
87 else:
88 data = ' '.join(words)
Tim Peterse6ddc8b2004-07-18 05:56:09 +000089 m.put(data)
90 except KeyboardInterrupt:
91 print 'Interrupted -- continue with data read so far'
92 if not m.trans:
93 print 'No valid input files'
94 return
95 if debug: print 'done.'
Georg Brandl86d38e92009-10-11 08:39:16 +000096
Tim Peterse6ddc8b2004-07-18 05:56:09 +000097 if debug > 1:
98 for key in m.trans.keys():
99 if key is None or len(key) < histsize:
100 print repr(key), m.trans[key]
101 if histsize == 0: print repr(''), m.trans['']
102 print
Georg Brandl86d38e92009-10-11 08:39:16 +0000103 while True:
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000104 data = m.get()
Georg Brandl86d38e92009-10-11 08:39:16 +0000105 if do_words:
106 words = data
107 else:
108 words = data.split()
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000109 n = 0
110 limit = 72
111 for w in words:
112 if n + len(w) > limit:
113 print
114 n = 0
115 print w,
Georg Brandl86d38e92009-10-11 08:39:16 +0000116 n += len(w) + 1
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000117 print
118 print
Guido van Rossum6930b3d1993-12-14 10:08:02 +0000119
Johannes Gijsbers7a8c43e2004-09-11 16:34:35 +0000120if __name__ == "__main__":
121 test()