blob: 990c972e0474fbf46b4890362ee20a4b210c5f18 [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Guido van Rossum6930b3d1993-12-14 10:08:02 +00002
3class Markov:
Tim Peterse6ddc8b2004-07-18 05:56:09 +00004 def __init__(self, histsize, choice):
5 self.histsize = histsize
6 self.choice = choice
7 self.trans = {}
Georg Brandl2cbd09c2009-10-11 08:42:09 +00008
Tim Peterse6ddc8b2004-07-18 05:56:09 +00009 def add(self, state, next):
Georg Brandl2cbd09c2009-10-11 08:42:09 +000010 self.trans.setdefault(state, []).append(next)
11
Tim Peterse6ddc8b2004-07-18 05:56:09 +000012 def put(self, seq):
13 n = self.histsize
14 add = self.add
15 add(None, seq[:0])
16 for i in range(len(seq)):
17 add(seq[max(0, i-n):i], seq[i:i+1])
18 add(seq[len(seq)-n:], None)
Georg Brandl2cbd09c2009-10-11 08:42:09 +000019
Tim Peterse6ddc8b2004-07-18 05:56:09 +000020 def get(self):
21 choice = self.choice
22 trans = self.trans
23 n = self.histsize
24 seq = choice(trans[None])
Georg Brandl2cbd09c2009-10-11 08:42:09 +000025 while True:
Tim Peterse6ddc8b2004-07-18 05:56:09 +000026 subseq = seq[max(0, len(seq)-n):]
27 options = trans[subseq]
28 next = choice(options)
Georg Brandl2cbd09c2009-10-11 08:42:09 +000029 if not next:
30 break
31 seq += next
Tim Peterse6ddc8b2004-07-18 05:56:09 +000032 return seq
Guido van Rossum6930b3d1993-12-14 10:08:02 +000033
Georg Brandl2cbd09c2009-10-11 08:42:09 +000034
Guido van Rossum6930b3d1993-12-14 10:08:02 +000035def test():
Georg Brandl2cbd09c2009-10-11 08:42:09 +000036 import sys, random, getopt
Tim Peterse6ddc8b2004-07-18 05:56:09 +000037 args = sys.argv[1:]
38 try:
Georg Brandl2cbd09c2009-10-11 08:42:09 +000039 opts, args = getopt.getopt(args, '0123456789cdwq')
Tim Peterse6ddc8b2004-07-18 05:56:09 +000040 except getopt.error:
Georg Brandl2cbd09c2009-10-11 08:42:09 +000041 print('Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0])
Collin Winter6f2df4d2007-07-17 20:59:35 +000042 print('Options:')
43 print('-#: 1-digit history size (default 2)')
44 print('-c: characters (default)')
45 print('-w: words')
46 print('-d: more debugging output')
47 print('-q: no debugging output')
48 print('Input files (default stdin) are split in paragraphs')
49 print('separated blank lines and each paragraph is split')
50 print('in words by whitespace, then reconcatenated with')
51 print('exactly one space separating words.')
52 print('Output consists of paragraphs separated by blank')
53 print('lines, where lines are no longer than 72 characters.')
Georg Brandl2cbd09c2009-10-11 08:42:09 +000054 sys.exit(2)
Tim Peterse6ddc8b2004-07-18 05:56:09 +000055 histsize = 2
Georg Brandl2cbd09c2009-10-11 08:42:09 +000056 do_words = False
Tim Peterse6ddc8b2004-07-18 05:56:09 +000057 debug = 1
58 for o, a in opts:
Georg Brandl2cbd09c2009-10-11 08:42:09 +000059 if '-0' <= o <= '-9': histsize = int(o[1:])
60 if o == '-c': do_words = False
61 if o == '-d': debug += 1
Tim Peterse6ddc8b2004-07-18 05:56:09 +000062 if o == '-q': debug = 0
Georg Brandl2cbd09c2009-10-11 08:42:09 +000063 if o == '-w': do_words = True
64 if not args:
65 args = ['-']
66
Tim Peterse6ddc8b2004-07-18 05:56:09 +000067 m = Markov(histsize, random.choice)
68 try:
69 for filename in args:
70 if filename == '-':
71 f = sys.stdin
72 if f.isatty():
Collin Winter6f2df4d2007-07-17 20:59:35 +000073 print('Sorry, need stdin from file')
Tim Peterse6ddc8b2004-07-18 05:56:09 +000074 continue
75 else:
76 f = open(filename, 'r')
Collin Winter6f2df4d2007-07-17 20:59:35 +000077 if debug: print('processing', filename, '...')
Tim Peterse6ddc8b2004-07-18 05:56:09 +000078 text = f.read()
79 f.close()
Georg Brandl2cbd09c2009-10-11 08:42:09 +000080 paralist = text.split('\n\n')
Tim Peterse6ddc8b2004-07-18 05:56:09 +000081 for para in paralist:
Collin Winter6f2df4d2007-07-17 20:59:35 +000082 if debug > 1: print('feeding ...')
Georg Brandl2cbd09c2009-10-11 08:42:09 +000083 words = para.split()
Tim Peterse6ddc8b2004-07-18 05:56:09 +000084 if words:
Georg Brandl2cbd09c2009-10-11 08:42:09 +000085 if do_words:
86 data = tuple(words)
87 else:
88 data = ' '.join(words)
Tim Peterse6ddc8b2004-07-18 05:56:09 +000089 m.put(data)
90 except KeyboardInterrupt:
Collin Winter6f2df4d2007-07-17 20:59:35 +000091 print('Interrupted -- continue with data read so far')
Tim Peterse6ddc8b2004-07-18 05:56:09 +000092 if not m.trans:
Collin Winter6f2df4d2007-07-17 20:59:35 +000093 print('No valid input files')
Tim Peterse6ddc8b2004-07-18 05:56:09 +000094 return
Collin Winter6f2df4d2007-07-17 20:59:35 +000095 if debug: print('done.')
Georg Brandl2cbd09c2009-10-11 08:42:09 +000096
Tim Peterse6ddc8b2004-07-18 05:56:09 +000097 if debug > 1:
Skip Montanaro1e8ce582007-08-06 21:07:53 +000098 for key in m.trans.keys():
Tim Peterse6ddc8b2004-07-18 05:56:09 +000099 if key is None or len(key) < histsize:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000100 print(repr(key), m.trans[key])
101 if histsize == 0: print(repr(''), m.trans[''])
102 print()
Georg Brandl2cbd09c2009-10-11 08:42:09 +0000103 while True:
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000104 data = m.get()
Georg Brandl2cbd09c2009-10-11 08:42:09 +0000105 if do_words:
106 words = data
107 else:
108 words = data.split()
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000109 n = 0
110 limit = 72
111 for w in words:
112 if n + len(w) > limit:
Collin Winter6f2df4d2007-07-17 20:59:35 +0000113 print()
Tim Peterse6ddc8b2004-07-18 05:56:09 +0000114 n = 0
Collin Winter6f2df4d2007-07-17 20:59:35 +0000115 print(w, end=' ')
Georg Brandl2cbd09c2009-10-11 08:42:09 +0000116 n += len(w) + 1
Collin Winter6f2df4d2007-07-17 20:59:35 +0000117 print()
118 print()
Guido van Rossum6930b3d1993-12-14 10:08:02 +0000119
Johannes Gijsbers7a8c43e2004-09-11 16:34:35 +0000120if __name__ == "__main__":
121 test()