Blame - Demo/scripts/markov.py - platform/external/python/cpython3

blob: 6bd62d0249aaa7611532f1a0c6c1f26af5e4e6bb [file] [log] [blame]

Guido van Rossum	f06ee5f	1996-11-27 19:52:01 +0000	[diff] [blame]	1	#! /usr/bin/env python
Guido van Rossum	6930b3d	1993-12-14 10:08:02 +0000	[diff] [blame]	2
				3	class Markov:
				4	def __init__(self, histsize, choice):
				5	self.histsize = histsize
				6	self.choice = choice
				7	self.trans = {}
				8	def add(self, state, next):
				9	if not self.trans.has_key(state):
				10	self.trans[state] = [next]
				11	else:
				12	self.trans[state].append(next)
				13	def put(self, seq):
				14	n = self.histsize
				15	add = self.add
				16	add(None, seq[:0])
				17	for i in range(len(seq)):
				18	add(seq[max(0, i-n):i], seq[i:i+1])
				19	add(seq[len(seq)-n:], None)
				20	def get(self):
				21	choice = self.choice
				22	trans = self.trans
				23	n = self.histsize
				24	seq = choice(trans[None])
				25	while 1:
				26	subseq = seq[max(0, len(seq)-n):]
				27	options = trans[subseq]
				28	next = choice(options)
				29	if not next: break
				30	seq = seq + next
				31	return seq
				32
				33	def test():
				34	import sys, string, whrandom, getopt
				35	args = sys.argv[1:]
				36	try:
				37	opts, args = getopt.getopt(args, '0123456789cdw')
				38	except getopt.error:
				39	print 'Usage: markov [-#] [-cddqw] [file] ...'
				40	print 'Options:'
				41	print '-#: 1-digit history size (default 2)'
				42	print '-c: characters (default)'
				43	print '-w: words'
				44	print '-d: more debugging output'
				45	print '-q: no debugging output'
				46	print 'Input files (default stdin) are split in paragraphs'
				47	print 'separated blank lines and each paragraph is split'
				48	print 'in words by whitespace, then reconcatenated with'
				49	print 'exactly one space separating words.'
				50	print 'Output consists of paragraphs separated by blank'
				51	print 'lines, where lines are no longer than 72 characters.'
				52	histsize = 2
				53	do_words = 0
				54	debug = 1
				55	for o, a in opts:
				56	if '-0' <= o <= '-9': histsize = eval(o[1:])
				57	if o == '-c': do_words = 0
				58	if o == '-d': debug = debug + 1
				59	if o == '-q': debug = 0
				60	if o == '-w': do_words = 1
				61	if not args: args = ['-']
				62	m = Markov(histsize, whrandom.choice)
				63	try:
				64	for filename in args:
				65	if filename == '-':
				66	f = sys.stdin
				67	if f.isatty():
				68	print 'Sorry, need stdin from file'
				69	continue
				70	else:
				71	f = open(filename, 'r')
				72	if debug: print 'processing', filename, '...'
				73	text = f.read()
				74	f.close()
				75	paralist = string.splitfields(text, '\n\n')
				76	for para in paralist:
				77	if debug > 1: print 'feeding ...'
				78	words = string.split(para)
				79	if words:
				80	if do_words: data = tuple(words)
				81	else: data = string.joinfields(words, ' ')
				82	m.put(data)
				83	except KeyboardInterrupt:
				84	print 'Interrupted -- continue with data read so far'
				85	if not m.trans:
				86	print 'No valid input files'
				87	return
				88	if debug: print 'done.'
				89	if debug > 1:
				90	for key in m.trans.keys():
				91	if key is None or len(key) < histsize:
				92	print `key`, m.trans[key]
				93	if histsize == 0: print `''`, m.trans['']
				94	print
				95	while 1:
				96	data = m.get()
				97	if do_words: words = data
				98	else: words = string.split(data)
				99	n = 0
				100	limit = 72
				101	for w in words:
				102	if n + len(w) > limit:
				103	print
				104	n = 0
				105	print w,
				106	n = n + len(w) + 1
				107	print
				108	print
				109
				110	def tuple(list):
				111	if len(list) == 0: return ()
				112	if len(list) == 1: return (list[0],)
				113	i = len(list)/2
				114	return tuple(list[:i]) + tuple(list[i:])
				115
				116	test()