blob: 32f3e79601be60298feca0d59f591b4d9a84359d [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Guido van Rossum03d4c261995-01-04 19:21:44 +00002
Guido van Rossum694f7011996-09-10 17:59:15 +00003# A somewhat-generalized FAQ-to-HTML converter (by Ka-Ping Yee, 10 Sept 96)
Guido van Rossum03d4c261995-01-04 19:21:44 +00004
Guido van Rossum694f7011996-09-10 17:59:15 +00005# Reads a text file given on standard input or named as first argument, and
6# generates HTML 2.0 on standard output. Recognizes these constructions:
7#
8# HTML element pattern at the beginning of a line
9#
10# section heading (<number><period>)+<space>
11# numbered list element <1-2 spaces>(<number><period>)+<space>
12# unnumbered list element <0-2 spaces><hyphen or asterisk><space>
13# preformatted section <more than two spaces>
14#
15# Heading level is determined by the number of (<number><period>) segments.
16# Blank lines force a separation of elements; if none of the above four
17# types is indicated, a new paragraph begins. A line beginning with many
18# spaces is interpreted as a continuation (instead of preformatted) after
19# a list element. Headings are anchored; paragraphs starting with "Q." are
20# emphasized, and those marked with "A." get their first sentence emphasized.
21#
22# Hyperlinks are created from references to:
23# URLs, explicitly marked using <URL:scheme://host...>
24# other questions, of the form "question <number>(<period><number>)*"
25# sections, of the form "section <number>".
Guido van Rossum03d4c261995-01-04 19:21:44 +000026
Guido van Rossum694f7011996-09-10 17:59:15 +000027import sys, string, regex, regsub, regex_syntax
28regex.set_syntax(regex_syntax.RE_SYNTAX_AWK)
Guido van Rossum03d4c261995-01-04 19:21:44 +000029
Guido van Rossum694f7011996-09-10 17:59:15 +000030# --------------------------------------------------------- regular expressions
31orditemprog = regex.compile(' ?([1-9][0-9]*\.)+ +')
32itemprog = regex.compile(' ? ?[-*] +')
33headingprog = regex.compile('([1-9][0-9]*\.)+ +')
34prefmtprog = regex.compile(' ')
35blankprog = regex.compile('^[ \t\r\n]$')
36questionprog = regex.compile(' *Q\. +')
37answerprog = regex.compile(' *A\. +')
38sentprog = regex.compile('(([^.:;?!(]|[.:;?!][^ \t\r\n])+[.:;?!]?)')
Guido van Rossum03d4c261995-01-04 19:21:44 +000039
Guido van Rossum694f7011996-09-10 17:59:15 +000040mailhdrprog = regex.compile('^(Subject|Newsgroups|Followup-To|From|Reply-To'
41 '|Approved|Archive-Name|Version|Last-Modified): +', regex.casefold)
42urlprog = regex.compile('&lt;URL:([^&]+)&gt;')
43addrprog = regex.compile('&lt;([^>@:]+@[^&@:]+)&gt;')
44qrefprog = regex.compile('question +([1-9](\.[0-9]+)*)')
45srefprog = regex.compile('section +([1-9][0-9]*)')
46entityprog = regex.compile('[&<>]')
Guido van Rossum03d4c261995-01-04 19:21:44 +000047
Guido van Rossum694f7011996-09-10 17:59:15 +000048# ------------------------------------------------------------ global variables
49body = []
50ollev = ullev = 0
51element = content = secnum = version = ''
Guido van Rossum03d4c261995-01-04 19:21:44 +000052
Guido van Rossum694f7011996-09-10 17:59:15 +000053# ----------------------------------------------------- for making nested lists
54def dnol():
55 global body, ollev
56 ollev = ollev + 1
57 if body[-1] == '</li>': del body[-1]
58 body.append('<ol>')
Guido van Rossum03d4c261995-01-04 19:21:44 +000059
Guido van Rossum694f7011996-09-10 17:59:15 +000060def upol():
61 global body, ollev
62 ollev = ollev - 1
63 body.append(ollev and '</ol></li>' or '</ol>')
Guido van Rossum03d4c261995-01-04 19:21:44 +000064
Guido van Rossum694f7011996-09-10 17:59:15 +000065# --------------------------------- output one element and convert its contents
66def spew(clearol=0, clearul=0):
67 global content, body, ollev, ullev
Guido van Rossum03d4c261995-01-04 19:21:44 +000068
Guido van Rossum694f7011996-09-10 17:59:15 +000069 if content:
70 if entityprog.search(content) > -1:
71 content = regsub.gsub('&', '&amp;', content)
72 content = regsub.gsub('<', '&lt;', content)
73 content = regsub.gsub('>', '&gt;', content)
Guido van Rossum03d4c261995-01-04 19:21:44 +000074
Guido van Rossum694f7011996-09-10 17:59:15 +000075 n = questionprog.match(content)
76 if n > 0:
77 content = '<em>' + content[n:] + '</em>'
78 if ollev: # question reference in index
79 fragid = regsub.gsub('^ +|\.? +$', '', secnum)
80 content = '<a href="#%s">%s</a>' % (fragid, content)
Guido van Rossum03d4c261995-01-04 19:21:44 +000081
Guido van Rossum694f7011996-09-10 17:59:15 +000082 if element[0] == 'h': # heading in the main text
83 fragid = regsub.gsub('^ +|\.? +$', '', secnum)
84 content = secnum + '<a name="%s">%s</a>' % (fragid, content)
Guido van Rossum03d4c261995-01-04 19:21:44 +000085
Guido van Rossum694f7011996-09-10 17:59:15 +000086 n = answerprog.match(content)
87 if n > 0: # answer paragraph
88 content = regsub.sub(sentprog, '<strong>\\1</strong>', content[n:])
Guido van Rossum03d4c261995-01-04 19:21:44 +000089
Guido van Rossum694f7011996-09-10 17:59:15 +000090 body.append('<' + element + '>' + content)
91 body.append('</' + element + '>')
92 content = ''
93
94 while clearol and ollev: upol()
95 if clearul and ullev: body.append('</ul>'); ullev = 0
96
97# ---------------------------------------------------------------- main program
98faq = len(sys.argv)>1 and sys.argv[1] and open(sys.argv[1]) or sys.stdin
99lines = faq.readlines()
100
101for line in lines:
102 if line[2:9] == '=======': # <hr> will appear *before*
103 body.append('<hr>') # the underlined heading
104 continue
105
106 n = orditemprog.match(line)
107 if n > 0: # make ordered list item
108 spew(0, 'clear ul')
109 secnum = line[:n]
110 level = string.count(secnum, '.')
111 while level > ollev: dnol()
112 while level < ollev: upol()
113 element, content = 'li', line[n:]
114 continue
115
116 n = itemprog.match(line)
117 if n > 0: # make unordered list item
118 spew('clear ol', 0)
119 if ullev == 0: body.append('<ul>'); ullev = 1
120 element, content = 'li', line[n:]
121 continue
122
123 n = headingprog.match(line)
124 if n > 0: # make heading element
125 spew('clear ol', 'clear ul')
126 secnum = line[:n]
127 sys.stderr.write(line)
128 element, content = 'h%d' % string.count(secnum, '.'), line[n:]
129 continue
130
131 n = 0
132 if not secnum: # haven't hit body yet
133 n = mailhdrprog.match(line)
134 v = version and -1 or regex.match('Version: ', line)
135 if v > 0 and not version: version = line[v:]
136 if n <= 0 and element != 'li': # not pre if after a list item
137 n = prefmtprog.match(line)
138 if n > 0: # make preformatted element
139 if element == 'pre':
140 content = content + line
141 else:
142 spew('clear ol', 'clear ul')
143 element, content = 'pre', line
144 continue
145
146 if blankprog.match(line) > 0: # force a new element
147 spew()
148 element = ''
149 elif element: # continue current element
150 content = content + line
151 else: # no element; make paragraph
152 spew('clear ol', 'clear ul')
153 element, content = 'p', line
154
155spew() # output last element
156
157body = string.joinfields(body, '')
158body = regsub.gsub(urlprog, '<a href="\\1">\\1</a>', body)
159body = regsub.gsub(addrprog, '<a href="mailto:\\1">\\1</a>', body)
160body = regsub.gsub(qrefprog, '<a href="#\\1">question \\1</a>', body)
161body = regsub.gsub(srefprog, '<a href="#\\1">section \\1</a>', body)
162
163print '<!doctype html public "-//IETF//DTD HTML 2.0//EN"><html>'
164print '<head><title>Python Frequently-Asked Questions v' + version
165print "</title></head><body>(This file was generated using Ping's"
166print '<a href="faq2html.py">faq2html.py</a>.)'
167print body + '</body></html>'