blob: bddec569364c1077dcda8abc60fd4e4183aa1660 [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Guido van Rossum6930b3d1993-12-14 10:08:02 +00002
3class Markov:
Tim Peterse6ddc8b2004-07-18 05:56:09 +00004 def __init__(self, histsize, choice):
5 self.histsize = histsize
6 self.choice = choice
7 self.trans = {}
8 def add(self, state, next):
9 if not self.trans.has_key(state):
10 self.trans[state] = [next]
11 else:
12 self.trans[state].append(next)
13 def put(self, seq):
14 n = self.histsize
15 add = self.add
16 add(None, seq[:0])
17 for i in range(len(seq)):
18 add(seq[max(0, i-n):i], seq[i:i+1])
19 add(seq[len(seq)-n:], None)
20 def get(self):
21 choice = self.choice
22 trans = self.trans
23 n = self.histsize
24 seq = choice(trans[None])
25 while 1:
26 subseq = seq[max(0, len(seq)-n):]
27 options = trans[subseq]
28 next = choice(options)
29 if not next: break
30 seq = seq + next
31 return seq
Guido van Rossum6930b3d1993-12-14 10:08:02 +000032
33def test():
Tim Peterse6ddc8b2004-07-18 05:56:09 +000034 import sys, string, random, getopt
35 args = sys.argv[1:]
36 try:
37 opts, args = getopt.getopt(args, '0123456789cdw')
38 except getopt.error:
39 print 'Usage: markov [-#] [-cddqw] [file] ...'
40 print 'Options:'
41 print '-#: 1-digit history size (default 2)'
42 print '-c: characters (default)'
43 print '-w: words'
44 print '-d: more debugging output'
45 print '-q: no debugging output'
46 print 'Input files (default stdin) are split in paragraphs'
47 print 'separated blank lines and each paragraph is split'
48 print 'in words by whitespace, then reconcatenated with'
49 print 'exactly one space separating words.'
50 print 'Output consists of paragraphs separated by blank'
51 print 'lines, where lines are no longer than 72 characters.'
52 histsize = 2
53 do_words = 0
54 debug = 1
55 for o, a in opts:
56 if '-0' <= o <= '-9': histsize = eval(o[1:])
57 if o == '-c': do_words = 0
58 if o == '-d': debug = debug + 1
59 if o == '-q': debug = 0
60 if o == '-w': do_words = 1
61 if not args: args = ['-']
62 m = Markov(histsize, random.choice)
63 try:
64 for filename in args:
65 if filename == '-':
66 f = sys.stdin
67 if f.isatty():
68 print 'Sorry, need stdin from file'
69 continue
70 else:
71 f = open(filename, 'r')
72 if debug: print 'processing', filename, '...'
73 text = f.read()
74 f.close()
75 paralist = string.splitfields(text, '\n\n')
76 for para in paralist:
77 if debug > 1: print 'feeding ...'
78 words = string.split(para)
79 if words:
80 if do_words: data = tuple(words)
81 else: data = string.joinfields(words, ' ')
82 m.put(data)
83 except KeyboardInterrupt:
84 print 'Interrupted -- continue with data read so far'
85 if not m.trans:
86 print 'No valid input files'
87 return
88 if debug: print 'done.'
89 if debug > 1:
90 for key in m.trans.keys():
91 if key is None or len(key) < histsize:
92 print repr(key), m.trans[key]
93 if histsize == 0: print repr(''), m.trans['']
94 print
95 while 1:
96 data = m.get()
97 if do_words: words = data
98 else: words = string.split(data)
99 n = 0
100 limit = 72
101 for w in words:
102 if n + len(w) > limit:
103 print
104 n = 0
105 print w,
106 n = n + len(w) + 1
107 print
108 print
Guido van Rossum6930b3d1993-12-14 10:08:02 +0000109
110def tuple(list):
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000111 if len(list) == 0: return ()
112 if len(list) == 1: return (list[0],)
113 i = len(list)/2
114 return tuple(list[:i]) + tuple(list[i:])
Guido van Rossum6930b3d1993-12-14 10:08:02 +0000115
Johannes Gijsbers7a8c43e2004-09-11 16:34:35 +0000116if __name__ == "__main__":
117 test()