Blame - Misc/faq2html.py - platform/external/python/cpython3

Guido van Rossum

f06ee5f

1996-11-27 19:52:01 +0000

[diff] [blame]

1

#! /usr/bin/env python

Guido van Rossum

03d4c26

1995-01-04 19:21:44 +0000

[diff] [blame]

2

Guido van Rossum

694f701

1996-09-10 17:59:15 +0000

[diff] [blame]

3

# A somewhat-generalized FAQ-to-HTML converter (by Ka-Ping Yee, 10 Sept 96)

Guido van Rossum

03d4c26

1995-01-04 19:21:44 +0000

[diff] [blame]

4

Guido van Rossum

694f701

1996-09-10 17:59:15 +0000

[diff] [blame]

5

# Reads a text file given on standard input or named as first argument, and

6

# generates HTML 2.0 on standard output. Recognizes these constructions:

7

#

8

# HTML element pattern at the beginning of a line

9

#

10

# section heading (<number><period>)+<space>

11

# numbered list element <1-2 spaces>(<number><period>)+<space>

12

# unnumbered list element <0-2 spaces><hyphen or asterisk><space>

13

# preformatted section <more than two spaces>

14

#

15

# Heading level is determined by the number of (<number><period>) segments.

16

# Blank lines force a separation of elements; if none of the above four

17

# types is indicated, a new paragraph begins. A line beginning with many

18

# spaces is interpreted as a continuation (instead of preformatted) after

19

# a list element. Headings are anchored; paragraphs starting with "Q." are

20

# emphasized, and those marked with "A." get their first sentence emphasized.

21

#

22

# Hyperlinks are created from references to:

23

# URLs, explicitly marked using <URL:scheme://host...>

24

# other questions, of the form "question <number>(<period><number>)*"

25

# sections, of the form "section <number>".

Guido van Rossum

03d4c26

1995-01-04 19:21:44 +0000

[diff] [blame]

26

Guido van Rossum

694f701

1996-09-10 17:59:15 +0000

[diff] [blame]

27

import sys, string, regex, regsub, regex_syntax

28

regex.set_syntax(regex_syntax.RE_SYNTAX_AWK)

Guido van Rossum

03d4c26

1995-01-04 19:21:44 +0000

[diff] [blame]

29

Guido van Rossum

694f701

1996-09-10 17:59:15 +0000

[diff] [blame]

30

# --------------------------------------------------------- regular expressions

31

orditemprog = regex.compile(' ?([1-9][0-9]*\.)+ +')

32

itemprog = regex.compile(' ? ?[-*] +')

33

headingprog = regex.compile('([1-9][0-9]*\.)+ +')

34

prefmtprog = regex.compile(' ')

35

blankprog = regex.compile('^[ \t\r\n]$')

36

questionprog = regex.compile(' *Q\. +')

37

answerprog = regex.compile(' *A\. +')

38

sentprog = regex.compile('(([^.:;?!(]|[.:;?!][^ \t\r\n])+[.:;?!]?)')

Guido van Rossum

03d4c26

1995-01-04 19:21:44 +0000

[diff] [blame]

39

Guido van Rossum

694f701

1996-09-10 17:59:15 +0000

[diff] [blame]

40

mailhdrprog = regex.compile('^(Subject|Newsgroups|Followup-To|From|Reply-To'

41

'|Approved|Archive-Name|Version|Last-Modified): +', regex.casefold)

42

urlprog = regex.compile('<URL:([^&]+)>')

43

addrprog = regex.compile('<([^>@:]+@[^&@:]+)>')

44

qrefprog = regex.compile('question +([1-9](\.[0-9]+)*)')

45

srefprog = regex.compile('section +([1-9][0-9]*)')

46

entityprog = regex.compile('[&<>]')

Guido van Rossum

03d4c26

1995-01-04 19:21:44 +0000

[diff] [blame]

47

Guido van Rossum

694f701

1996-09-10 17:59:15 +0000

[diff] [blame]

48

# ------------------------------------------------------------ global variables

49

body = []

50

ollev = ullev = 0

51

element = content = secnum = version = ''

Guido van Rossum

03d4c26

1995-01-04 19:21:44 +0000

[diff] [blame]

52

Guido van Rossum

694f701

1996-09-10 17:59:15 +0000

[diff] [blame]

53

# ----------------------------------------------------- for making nested lists

def dnol():

global body, ollev

ollev = ollev + 1

if body[-1] == '</li>': del body[-1]

58

body.append('<ol>')

Guido van Rossum

03d4c26

1995-01-04 19:21:44 +0000

[diff] [blame]

59

Guido van Rossum

694f701

1996-09-10 17:59:15 +0000

[diff] [blame]

def upol():

global body, ollev

ollev = ollev - 1

body.append(ollev and '</ol></li>' or '</ol>')

Guido van Rossum

03d4c26

1995-01-04 19:21:44 +0000

[diff] [blame]

64

Guido van Rossum

694f701

1996-09-10 17:59:15 +0000

[diff] [blame]

65

# --------------------------------- output one element and convert its contents

66

def spew(clearol=0, clearul=0):

67

global content, body, ollev, ullev

Guido van Rossum

03d4c26

1995-01-04 19:21:44 +0000

[diff] [blame]

68

Guido van Rossum

694f701

1996-09-10 17:59:15 +0000

[diff] [blame]

69

if content:

70

if entityprog.search(content) > -1:

71

content = regsub.gsub('&', '&', content)

72

content = regsub.gsub('<', '<', content)

73

content = regsub.gsub('>', '>', content)

Guido van Rossum

03d4c26

1995-01-04 19:21:44 +0000

[diff] [blame]

74

Guido van Rossum

694f701

1996-09-10 17:59:15 +0000

[diff] [blame]

75

n = questionprog.match(content)

76

if n > 0:

77

content = '<em>' + content[n:] + '</em>'

78

if ollev: # question reference in index

79

fragid = regsub.gsub('^ +|\.? +$', '', secnum)

80

content = '<a href="#%s">%s</a>' % (fragid, content)

Guido van Rossum

03d4c26

1995-01-04 19:21:44 +0000

[diff] [blame]

81

Guido van Rossum

694f701

1996-09-10 17:59:15 +0000

[diff] [blame]

82

if element[0] == 'h': # heading in the main text

83

fragid = regsub.gsub('^ +|\.? +$', '', secnum)

84

content = secnum + '<a name="%s">%s</a>' % (fragid, content)

Guido van Rossum

03d4c26

1995-01-04 19:21:44 +0000

[diff] [blame]

85

Guido van Rossum

694f701

1996-09-10 17:59:15 +0000

[diff] [blame]

86

n = answerprog.match(content)

87

if n > 0: # answer paragraph

88

content = regsub.sub(sentprog, '<strong>\\1</strong>', content[n:])

Guido van Rossum

03d4c26

1995-01-04 19:21:44 +0000

[diff] [blame]

89

Guido van Rossum

694f701

1996-09-10 17:59:15 +0000

[diff] [blame]

90

body.append('<' + element + '>' + content)

91

body.append('</' + element + '>')

92

content = ''

93

94

while clearol and ollev: upol()

95

if clearul and ullev: body.append('</ul>'); ullev = 0

96

97

# ---------------------------------------------------------------- main program

98

faq = len(sys.argv)>1 and sys.argv[1] and open(sys.argv[1]) or sys.stdin

99

lines = faq.readlines()

100

101

for line in lines:

102

if line[2:9] == '=======': # <hr> will appear *before*

103

body.append('<hr>') # the underlined heading

104

continue

105

106

n = orditemprog.match(line)

107

if n > 0: # make ordered list item

108

spew(0, 'clear ul')

109

secnum = line[:n]

110

level = string.count(secnum, '.')

111

while level > ollev: dnol()

112

while level < ollev: upol()

113

element, content = 'li', line[n:]

114

continue

115

116

n = itemprog.match(line)

117

if n > 0: # make unordered list item

118

spew('clear ol', 0)

119

if ullev == 0: body.append('<ul>'); ullev = 1

120

element, content = 'li', line[n:]

121

continue

122

123

n = headingprog.match(line)

124

if n > 0: # make heading element

125

spew('clear ol', 'clear ul')

126

secnum = line[:n]

127

sys.stderr.write(line)

128

element, content = 'h%d' % string.count(secnum, '.'), line[n:]

continue

n = 0

if not secnum: # haven't hit body yet

133

n = mailhdrprog.match(line)

134

v = version and -1 or regex.match('Version: ', line)

135

if v > 0 and not version: version = line[v:]

136

if n <= 0 and element != 'li': # not pre if after a list item

137

n = prefmtprog.match(line)

138

if n > 0: # make preformatted element

139

if element == 'pre':

140

content = content + line

141

else:

142

spew('clear ol', 'clear ul')

143

element, content = 'pre', line

144

continue

145

146

if blankprog.match(line) > 0: # force a new element

147

spew()

148

element = ''

149

elif element: # continue current element

150

content = content + line

151

else: # no element; make paragraph

152

spew('clear ol', 'clear ul')

153

element, content = 'p', line

154

155

spew() # output last element

156

157

body = string.joinfields(body, '')

158

body = regsub.gsub(urlprog, '<a href="\\1">\\1</a>', body)

159

body = regsub.gsub(addrprog, '<a href="mailto:\\1">\\1</a>', body)

160

body = regsub.gsub(qrefprog, '<a href="#\\1">question \\1</a>', body)

161

body = regsub.gsub(srefprog, '<a href="#\\1">section \\1</a>', body)

162

163

print '<!doctype html public "-//IETF//DTD HTML 2.0//EN"><html>'

164

print '<head><title>Python Frequently-Asked Questions v' + version

165

print "</title></head><body>(This file was generated using Ping's"

166

print '<a href="faq2html.py">faq2html.py</a>.)'

167

print body + '</body></html>'