blob: cdb49de8f499c18a4b4abaf1658747fe920e0b1b [file] [log] [blame]
Guido van Rossum03d4c261995-01-04 19:21:44 +00001#! /usr/local/bin/python
2
3# Convert the Python FAQ to HTML
4
5import string
6import regex
7import regsub
8import sys
9import os
10
11FAQ = 'FAQ'
12
13chapterprog = regex.compile('^\([1-9][0-9]*\)\. ')
14questionprog = regex.compile('^\([1-9][0-9]*\)\.\([1-9][0-9]*\)\. ')
15newquestionprog = regex.compile('^Q\. ')
16blankprog = regex.compile('^[ \t]*$')
17indentedorblankprog = regex.compile('^\([ \t]+\|[ \t]*$\)')
18underlineprog = regex.compile('^==*$')
19eightblanksprog = regex.compile('^\( \| *\t\)')
20mailheaderprog = regex.compile('^\(Subject\|Newsgroups\|Followup-To\|From\|Reply-To\|Approved\|Archive-name\|Version\|Last-modified\): +')
21urlprog = regex.compile('<URL:\([^>]*\)>')
22ampprog = regex.compile('&')
23aprog = regex.compile('^A\. +')
24qprog = regex.compile('>Q\. +')
25qrefprog = regex.compile('question +\([0-9]\.[0-9]+\)')
26versionprog = regex.compile('^Version: ')
27emailprog = regex.compile('<\([^>@:]+@[^>@:]+\)>')
28
29def main():
30 print 'Reading lines...'
31 lines = open(FAQ, 'r').readlines()
32 print 'Renumbering in memory...'
33 oldlines = lines[:]
34 after_blank = 1
35 chapter = 0
36 question = 0
37 chapters = ['<OL>']
38 questions = ['<OL>']
39 for i in range(len(lines)):
40 line = lines[i]
41 if after_blank:
42 n = chapterprog.match(line)
43 if n >= 0:
44 chapter = chapter + 1
45 if chapter != 1:
46 questions.append('</UL>\n')
47 question = 0
48 lines[i] = '<H2>' + line[n:-1] + '</H2>\n'
49 chapters.append('<LI> ' + line[n:])
50 questions.append('<LI> ' + line[n:])
51 questions.append('<UL>\n')
52 afterblank = 0
53 continue
54 n = underlineprog.match(line)
55 if n >= 0:
56 lines[i] = ''
57 continue
58 n = questionprog.match(line)
59 if n < 0: n = newquestionprog.match(line) - 3
60 if n >= 0:
61 question = question + 1
62 number = '%d.%d'%(chapter, question)
63 lines[i] = '<A NAME="' + number + '"><H3>' + line[n:]
64 questions.append('<LI><A HREF="#' + \
65 number + '">' + line[n:])
66 # Add up to 4 continuations of the question
67 n = len(number)
68 for j in range(i+1, i+5):
69 if blankprog.match(lines[j]) >= 0:
70 lines[j-1] = lines[j-1] + '</H3></A>'
71 questions[-1] = \
72 questions[-1][:-1] + '</A>\n'
73 break
74 questions.append(' '*(n+2) + lines[j])
75 afterblank = 0
76 continue
77 afterblank = (blankprog.match(line) >= 0)
78 print 'Inserting list of chapters...'
79 chapters.append('</OL>\n')
80 for i in range(len(lines)):
81 line = lines[i]
82 if regex.match(
83 '^This FAQ is divided in the following chapters',
84 line) >= 0:
85 i = i+1
86 while 1:
87 line = lines[i]
88 if indentedorblankprog.match(line) < 0:
89 break
90 del lines[i]
91 lines[i:i] = chapters
92 break
93 else:
94 print '*** Can\'t find header for list of chapters'
95 print '*** Chapters found:'
96 for line in chapters: print line,
97 print 'Inserting list of questions...'
98 questions.append('</UL></OL>\n')
99 for i in range(len(lines)):
100 line = lines[i]
101 if regex.match('^Here.s an overview of the questions',
102 line) >= 0:
103 i = i+1
104 while 1:
105 line = lines[i]
106 if indentedorblankprog.match(line) < 0:
107 break
108 del lines[i]
109 lines[i:i] = questions
110 break
111 else:
112 print '*** Can\'t find header for list of questions'
113 print '*** Questions found:'
114 for line in questions: print line,
115 # final cleanup
116 print "Final cleanup..."
117 doingpre = 0
118 for i in range(len(lines)):
119 # set lines indented by >= 8 spaces using PRE
120 # blank lines either terminate PRE or separate paragraphs
121 n = eightblanksprog.match(lines[i])
122 if n < 0: n = mailheaderprog.match(lines[i])
123 if n >= 0:
124 if versionprog.match(lines[i]) > 0:
125 version = string.split(lines[i])[1]
126 if doingpre == 0:
127 lines[i] = '<PRE>\n' + lines[i]
128 doingpre = 1
129 continue
130 n = blankprog.match(lines[i])
131 if n >= 0:
132 # print '*** ', lines[i-1], doingpre
133 if doingpre == 1:
134 lines[i] = '</PRE><P>\n'
135 doingpre = 0
136 else:
137 lines[i] = '<P>\n'
138 continue
139
140 # & -> &amp;
141 n = ampprog.search(lines[i])
142 if n >= 0:
143 lines[i] = regsub.gsub(ampprog, '&amp;', lines[i])
144 # no continue - there might be other changes to the line...
145
146 # zap all the 'Q.' and 'A.' leaders - what happened to the
147 # last couple?
148 n = qprog.search(lines[i])
149 if n >= 0:
150 lines[i] = regsub.sub(qprog, '>', lines[i])
151 # no continue - there might be other changes to the line...
152
153 n = aprog.search(lines[i])
154 if n >= 0:
155 lines[i] = regsub.sub(aprog, '', lines[i])
156 # no continue - there might be other changes to the line...
157
158 # patch up hard refs to questions
159 n = qrefprog.search(lines[i])
160 if n >= 0:
161 lines[i] = regsub.sub(qrefprog,
162 '<A HREF="#\\1">question \\1</A>', lines[i])
163 # no continue - there might be other changes to the line...
164
165 # make <URL:...> into actual links
166 n = urlprog.search(lines[i])
167 if n >= 0:
168 lines[i] = regsub.gsub(urlprog, '<A HREF="\\1">\\1</A>', lines[i])
169 # no continue - there might be other changes to the line...
170
171 # make <user@host.domain> into <mailto:...> links
172 n = emailprog.search(lines[i])
173 if n >= 0:
174 lines[i] = regsub.gsub(emailprog,
175 '<A HREF="mailto:\\1">\\1</A>', lines[i])
176 # no continue - there might be other changes to the line...
177
178 lines[0:0] = ['<HTML><HEAD><TITLE>Python Frequently Asked Questions v',
179 version,
180 '</TITLE>\n',
181 '</HEAD><body>\n',
182 '(This file was generated using',
183 '<A HREF="faq2html.py">faq2html.py</A>.)<P>\n']
184 lines.append('<P></BODY></HTML>\n')
185
186 print 'Writing html file...'
187 f = open(FAQ + '.html', 'w')
188 for line in lines:
189 f.write(line)
190 f.close()
191 print 'Done.'
192
193main()