blob: 3dc57ce63af67e5bef48a3181630e1c6a363c311 [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Guido van Rossum6930b3d1993-12-14 10:08:02 +00002
3class Markov:
Tim Peterse6ddc8b2004-07-18 05:56:09 +00004 def __init__(self, histsize, choice):
5 self.histsize = histsize
6 self.choice = choice
7 self.trans = {}
8 def add(self, state, next):
Collin Winter6f2df4d2007-07-17 20:59:35 +00009 if state not in self.trans:
Tim Peterse6ddc8b2004-07-18 05:56:09 +000010 self.trans[state] = [next]
11 else:
12 self.trans[state].append(next)
13 def put(self, seq):
14 n = self.histsize
15 add = self.add
16 add(None, seq[:0])
17 for i in range(len(seq)):
18 add(seq[max(0, i-n):i], seq[i:i+1])
19 add(seq[len(seq)-n:], None)
20 def get(self):
21 choice = self.choice
22 trans = self.trans
23 n = self.histsize
24 seq = choice(trans[None])
25 while 1:
26 subseq = seq[max(0, len(seq)-n):]
27 options = trans[subseq]
28 next = choice(options)
29 if not next: break
30 seq = seq + next
31 return seq
Guido van Rossum6930b3d1993-12-14 10:08:02 +000032
33def test():
Tim Peterse6ddc8b2004-07-18 05:56:09 +000034 import sys, string, random, getopt
35 args = sys.argv[1:]
36 try:
37 opts, args = getopt.getopt(args, '0123456789cdw')
38 except getopt.error:
Collin Winter6f2df4d2007-07-17 20:59:35 +000039 print('Usage: markov [-#] [-cddqw] [file] ...')
40 print('Options:')
41 print('-#: 1-digit history size (default 2)')
42 print('-c: characters (default)')
43 print('-w: words')
44 print('-d: more debugging output')
45 print('-q: no debugging output')
46 print('Input files (default stdin) are split in paragraphs')
47 print('separated blank lines and each paragraph is split')
48 print('in words by whitespace, then reconcatenated with')
49 print('exactly one space separating words.')
50 print('Output consists of paragraphs separated by blank')
51 print('lines, where lines are no longer than 72 characters.')
Tim Peterse6ddc8b2004-07-18 05:56:09 +000052 histsize = 2
53 do_words = 0
54 debug = 1
55 for o, a in opts:
56 if '-0' <= o <= '-9': histsize = eval(o[1:])
57 if o == '-c': do_words = 0
58 if o == '-d': debug = debug + 1
59 if o == '-q': debug = 0
60 if o == '-w': do_words = 1
61 if not args: args = ['-']
62 m = Markov(histsize, random.choice)
63 try:
64 for filename in args:
65 if filename == '-':
66 f = sys.stdin
67 if f.isatty():
Collin Winter6f2df4d2007-07-17 20:59:35 +000068 print('Sorry, need stdin from file')
Tim Peterse6ddc8b2004-07-18 05:56:09 +000069 continue
70 else:
71 f = open(filename, 'r')
Collin Winter6f2df4d2007-07-17 20:59:35 +000072 if debug: print('processing', filename, '...')
Tim Peterse6ddc8b2004-07-18 05:56:09 +000073 text = f.read()
74 f.close()
75 paralist = string.splitfields(text, '\n\n')
76 for para in paralist:
Collin Winter6f2df4d2007-07-17 20:59:35 +000077 if debug > 1: print('feeding ...')
Tim Peterse6ddc8b2004-07-18 05:56:09 +000078 words = string.split(para)
79 if words:
80 if do_words: data = tuple(words)
81 else: data = string.joinfields(words, ' ')
82 m.put(data)
83 except KeyboardInterrupt:
Collin Winter6f2df4d2007-07-17 20:59:35 +000084 print('Interrupted -- continue with data read so far')
Tim Peterse6ddc8b2004-07-18 05:56:09 +000085 if not m.trans:
Collin Winter6f2df4d2007-07-17 20:59:35 +000086 print('No valid input files')
Tim Peterse6ddc8b2004-07-18 05:56:09 +000087 return
Collin Winter6f2df4d2007-07-17 20:59:35 +000088 if debug: print('done.')
Tim Peterse6ddc8b2004-07-18 05:56:09 +000089 if debug > 1:
Skip Montanaro1e8ce582007-08-06 21:07:53 +000090 for key in m.trans.keys():
Tim Peterse6ddc8b2004-07-18 05:56:09 +000091 if key is None or len(key) < histsize:
Collin Winter6f2df4d2007-07-17 20:59:35 +000092 print(repr(key), m.trans[key])
93 if histsize == 0: print(repr(''), m.trans[''])
94 print()
Tim Peterse6ddc8b2004-07-18 05:56:09 +000095 while 1:
96 data = m.get()
97 if do_words: words = data
98 else: words = string.split(data)
99 n = 0
100 limit = 72
101 for w in words:
102 if n + len(w) > limit:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000103 print()
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000104 n = 0
Collin Winter6f2df4d2007-07-17 20:59:35 +0000105 print(w, end=' ')
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000106 n = n + len(w) + 1
Collin Winter6f2df4d2007-07-17 20:59:35 +0000107 print()
108 print()
Guido van Rossum6930b3d1993-12-14 10:08:02 +0000109
110def tuple(list):
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000111 if len(list) == 0: return ()
112 if len(list) == 1: return (list[0],)
113 i = len(list)/2
114 return tuple(list[:i]) + tuple(list[i:])
Guido van Rossum6930b3d1993-12-14 10:08:02 +0000115
Johannes Gijsbers7a8c43e2004-09-11 16:34:35 +0000116if __name__ == "__main__":
117 test()