Blame - Demo/scripts/markov.py - platform/external/python/cpython2

1993-12-14 10:08:02 +0000

[diff] [blame]

2

3

class Markov:

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

4

def __init__(self, histsize, choice):

5

self.histsize = histsize

6

self.choice = choice

7

self.trans = {}

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

8

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

9

def add(self, state, next):

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

10

self.trans.setdefault(state, []).append(next)

11

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

def put(self, seq):

n = self.histsize

add = self.add

add(None, seq[:0])

for i in range(len(seq)):

17

add(seq[max(0, i-n):i], seq[i:i+1])

18

add(seq[len(seq)-n:], None)

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

19

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

def get(self):

choice = self.choice

trans = self.trans

n = self.histsize

seq = choice(trans[None])

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

25

while True:

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

26

subseq = seq[max(0, len(seq)-n):]

27

options = trans[subseq]

28

next = choice(options)

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

29

if not next:

30

break

31

seq += next

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

32

return seq

Guido van Rossum

1993-12-14 10:08:02 +0000

[diff] [blame]

33

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

34

Guido van Rossum

1993-12-14 10:08:02 +0000

[diff] [blame]

35

def test():

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

36

import sys, random, getopt

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

37

args = sys.argv[1:]

38

try:

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

39

opts, args = getopt.getopt(args, '0123456789cdwq')

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

40

except getopt.error:

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

41

print 'Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0]

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

42

print 'Options:'

43

print '-#: 1-digit history size (default 2)'

44

print '-c: characters (default)'

45

print '-w: words'

46

print '-d: more debugging output'

47

print '-q: no debugging output'

48

print 'Input files (default stdin) are split in paragraphs'

49

print 'separated blank lines and each paragraph is split'

50

print 'in words by whitespace, then reconcatenated with'

51

print 'exactly one space separating words.'

52

print 'Output consists of paragraphs separated by blank'

53

print 'lines, where lines are no longer than 72 characters.'

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

54

sys.exit(2)

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

55

histsize = 2

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

56

do_words = False

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

57

debug = 1

58

for o, a in opts:

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

59

if '-0' <= o <= '-9': histsize = int(o[1:])

60

if o == '-c': do_words = False

61

if o == '-d': debug += 1

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

62

if o == '-q': debug = 0

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

63

if o == '-w': do_words = True

if not args:

args = ['-']

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

67

m = Markov(histsize, random.choice)

68

try:

69

for filename in args:

if filename == '-':

f = sys.stdin

if f.isatty():

print 'Sorry, need stdin from file'

74

continue

75

else:

76

f = open(filename, 'r')

77

if debug: print 'processing', filename, '...'

78

text = f.read()

79

f.close()

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

80

paralist = text.split('\n\n')

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

81

for para in paralist:

82

if debug > 1: print 'feeding ...'

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

83

words = para.split()

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

84

if words:

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

if do_words:

data = tuple(words)

else:

data = ' '.join(words)

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

89

m.put(data)

90

except KeyboardInterrupt:

91

print 'Interrupted -- continue with data read so far'

92

if not m.trans:

93

print 'No valid input files'

94

return

95

if debug: print 'done.'

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

96

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

97

if debug > 1:

98

for key in m.trans.keys():

99

if key is None or len(key) < histsize:

100

print repr(key), m.trans[key]

101

if histsize == 0: print repr(''), m.trans['']

102

print

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

103

while True:

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

104

data = m.get()

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

if do_words:

words = data

else:

words = data.split()

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

n = 0

limit = 72

for w in words:

if n + len(w) > limit:

113

print

114

n = 0

115

print w,

Georg Brandl

2009-10-11 08:39:16 +0000

[diff] [blame]

116

n += len(w) + 1

Tim Peters

2004-07-18 05:56:09 +0000

[diff] [blame]

117

print

118

print

Guido van Rossum