blob: 970ed34e3689354890d66a372c1fa404a2ff5de0 [file] [log] [blame]
Tim Peters4f109c12002-04-19 18:41:46 +00001"""
Tim Peters52cfa332002-04-19 16:09:26 +00002 Makes the necesary files to convert from plain html of
3 Python 1.5 and 1.5.x Documentation to
4 Microsoft HTML Help format version 1.1
5 Doesn't change the html's docs.
6
7 by hernan.foffani@iname.com
8 no copyright and no responsabilities.
9
10 modified by Dale Nagata for Python 1.5.2
11
12 Renamed from make_chm.py to prechm.py, and checked into the Python
13 project, 19-Apr-2002 by Tim Peters. Assorted modifications by Tim
14 and Fred Drake. Obtained from Robin Dunn's .chm packaging of the
15 Python 2.2 docs, at <http://alldunn.com/python/>.
Tim Peters4f109c12002-04-19 18:41:46 +000016"""
Tim Peters52cfa332002-04-19 16:09:26 +000017
18import sys
19import os
Tim Peterse21095e2002-04-20 08:36:42 +000020from formatter import NullWriter, AbstractFormatter
21from htmllib import HTMLParser
Tim Peters52cfa332002-04-19 16:09:26 +000022import getopt
Tim Peters45454072002-04-20 20:26:26 +000023import cgi
Tim Peters52cfa332002-04-19 16:09:26 +000024
Tim Peters52cfa332002-04-19 16:09:26 +000025usage_mode = '''
26Usage: make_chm.py [-c] [-k] [-p] [-v 1.5[.x]] filename
27 -c: does not build filename.hhc (Table of Contents)
28 -k: does not build filename.hhk (Index)
29 -p: does not build filename.hhp (Project File)
30 -v 1.5[.x]: makes help for the python 1.5[.x] docs
31 (default is python 1.5.2 docs)
32'''
33
Tim Peterse6b63e62002-04-19 18:07:52 +000034# Project file (*.hhp) template. 'arch' is the file basename (like
35# the pythlp in pythlp.hhp); 'version' is the doc version number (like
36# the 2.2 in Python 2.2).
37# The magical numbers in the long line under [WINDOWS] set most of the
38# user-visible features (visible buttons, tabs, etc).
Tim Peters52cfa332002-04-19 16:09:26 +000039project_template = '''
40[OPTIONS]
Tim Peterse6b63e62002-04-19 18:07:52 +000041Compiled file=%(arch)s.chm
42Contents file=%(arch)s.hhc
43Default Window=%(arch)s
Tim Peters52cfa332002-04-19 16:09:26 +000044Default topic=index.html
45Display compile progress=No
Tim Peters4f109c12002-04-19 18:41:46 +000046Full text search stop list file=%(arch)s.stp
Tim Peters52cfa332002-04-19 16:09:26 +000047Full-text search=Yes
Tim Peterse6b63e62002-04-19 18:07:52 +000048Index file=%(arch)s.hhk
Tim Peters52cfa332002-04-19 16:09:26 +000049Language=0x409
Tim Peterse6b63e62002-04-19 18:07:52 +000050Title=Python %(version)s Documentation
Tim Peters52cfa332002-04-19 16:09:26 +000051
52[WINDOWS]
Tim Peterse6b63e62002-04-19 18:07:52 +000053%(arch)s="Python %(version)s Documentation","%(arch)s.hhc","%(arch)s.hhk",\
54"index.html","index.html",,,,,0x63520,220,0x384e,[271,372,740,718],,,,,,,0
Tim Peters52cfa332002-04-19 16:09:26 +000055
56[FILES]
57'''
58
Tim Peters45454072002-04-20 20:26:26 +000059contents_header = '''\
60<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
61<HTML>
62<HEAD>
63<meta name="GENERATOR" content="Microsoft&reg; HTML Help Workshop 4.1">
64<!-- Sitemap 1.0 -->
65</HEAD><BODY>
Tim Peters52cfa332002-04-19 16:09:26 +000066<OBJECT type="text/site properties">
67 <param name="Window Styles" value="0x801227">
Tim Petersa9053632002-04-19 16:46:43 +000068 <param name="ImageType" value="Folder">
Tim Peters52cfa332002-04-19 16:09:26 +000069</OBJECT>
70<UL>
Tim Peters52cfa332002-04-19 16:09:26 +000071'''
72
Tim Peters45454072002-04-20 20:26:26 +000073contents_footer = '''\
Tim Petersc8490c72002-04-20 21:34:34 +000074</UL></BODY></HTML>
Tim Peters52cfa332002-04-19 16:09:26 +000075'''
76
Tim Peters45454072002-04-20 20:26:26 +000077object_sitemap = '''\
78<OBJECT type="text/sitemap">
79 <param name="Name" value="%s">
80 <param name="Local" value="%s">
81</OBJECT>
Tim Peters52cfa332002-04-19 16:09:26 +000082'''
83
Tim Peters4f109c12002-04-19 18:41:46 +000084# List of words the full text search facility shouldn't index. This
85# becomes file ARCH.stp. Note that this list must be pretty small!
86# Different versions of the MS docs claim the file has a maximum size of
87# 256 or 512 bytes (including \r\n at the end of each line).
88# Note that "and", "or", "not" and "near" are operators in the search
Tim Peters8d62ad72002-04-20 02:56:20 +000089# language, so no point indexing them even if we wanted to.
Tim Peters4f109c12002-04-19 18:41:46 +000090stop_list = '''
91a an and
92is
93near
94not
95of
96or
97the
98'''
99
Tim Petersd9a10502002-04-20 03:25:02 +0000100# s is a string or None. If None or empty, return None. Else tack '.html'
101# on to the end, unless it's already there.
102def addhtml(s):
103 if s:
104 if not s.endswith('.html'):
105 s += '.html'
106 return s
107
108# Convenience class to hold info about "a book" in HTMLHelp terms == a doc
109# directory in Python terms.
110class Book:
111 def __init__(self, directory, title, firstpage,
112 contentpage=None, indexpage=None):
113 self.directory = directory
114 self.title = title
115 self.firstpage = addhtml(firstpage)
116 self.contentpage = addhtml(contentpage)
117 self.indexpage = addhtml(indexpage)
118
119# Library Doc list of books:
Tim Peters661e4922002-04-20 02:39:44 +0000120# each 'book' : (Dir, Title, First page, Content page, Index page)
Tim Peters52cfa332002-04-19 16:09:26 +0000121supported_libraries = {
Tim Petersc8490c72002-04-20 21:34:34 +0000122 '2.2':
Tim Peters52cfa332002-04-19 16:09:26 +0000123 [
Tim Petersc8490c72002-04-20 21:34:34 +0000124 Book('.', 'Main page', 'index'),
Tim Petersd9a10502002-04-20 03:25:02 +0000125 Book('.', 'Global Module Index', 'modindex'),
126 Book('whatsnew', "What's New", 'index', 'contents'),
127 Book('tut','Tutorial','tut','node2'),
128 Book('lib','Library Reference','lib','contents','genindex'),
129 Book('ref','Language Reference','ref','contents','genindex'),
130 Book('mac','Macintosh Reference','mac','contents','genindex'),
131 Book('ext','Extending and Embedding','ext','contents'),
132 Book('api','Python/C API','api','contents','genindex'),
133 Book('doc','Documenting Python','doc','contents'),
134 Book('inst','Installing Python Modules', 'inst', 'index'),
135 Book('dist','Distributing Python Modules', 'dist', 'index'),
Tim Peters52cfa332002-04-19 16:09:26 +0000136 ],
137
138 '2.1.1':
139 [
Tim Petersc8490c72002-04-20 21:34:34 +0000140 Book('.', 'Main page', 'index'),
Tim Petersd9a10502002-04-20 03:25:02 +0000141 Book('.', 'Global Module Index', 'modindex'),
142 Book('tut','Tutorial','tut','node2'),
143 Book('lib','Library Reference','lib','contents','genindex'),
144 Book('ref','Language Reference','ref','contents','genindex'),
145 Book('mac','Macintosh Reference','mac','contents','genindex'),
146 Book('ext','Extending and Embedding','ext','contents'),
147 Book('api','Python/C API','api','contents','genindex'),
148 Book('doc','Documenting Python','doc','contents'),
149 Book('inst','Installing Python Modules', 'inst', 'index'),
150 Book('dist','Distributing Python Modules', 'dist', 'index'),
Tim Peters52cfa332002-04-19 16:09:26 +0000151 ],
152
153 '2.0.0':
154 [
Tim Petersd9a10502002-04-20 03:25:02 +0000155 Book('.', 'Global Module Index', 'modindex'),
156 Book('tut','Tutorial','tut','node2'),
157 Book('lib','Library Reference','lib','contents','genindex'),
158 Book('ref','Language Reference','ref','contents','genindex'),
159 Book('mac','Macintosh Reference','mac','contents','genindex'),
160 Book('ext','Extending and Embedding','ext','contents'),
161 Book('api','Python/C API','api','contents','genindex'),
162 Book('doc','Documenting Python','doc','contents'),
163 Book('inst','Installing Python Modules', 'inst', 'contents'),
164 Book('dist','Distributing Python Modules', 'dist', 'contents'),
Tim Peters52cfa332002-04-19 16:09:26 +0000165 ],
166
167 # <dnagata@creo.com> Apr 17/99: library for 1.5.2 version:
168 # <hernan.foffani@iname.com> May 01/99: library for 1.5.2 (04/30/99):
169 '1.5.2':
170 [
Tim Petersd9a10502002-04-20 03:25:02 +0000171 Book('tut','Tutorial','tut','node2'),
172 Book('lib','Library Reference','lib','contents','genindex'),
173 Book('ref','Language Reference','ref','contents','genindex'),
174 Book('mac','Macintosh Reference','mac','contents','genindex'),
175 Book('ext','Extending and Embedding','ext','contents'),
176 Book('api','Python/C API','api','contents','genindex'),
177 Book('doc','Documenting Python','doc','contents')
Tim Peters52cfa332002-04-19 16:09:26 +0000178 ],
179
180 # library for 1.5.1 version:
181 '1.5.1':
182 [
Tim Petersd9a10502002-04-20 03:25:02 +0000183 Book('tut','Tutorial','tut','contents'),
184 Book('lib','Library Reference','lib','contents','genindex'),
185 Book('ref','Language Reference','ref-1','ref-2','ref-11'),
186 Book('ext','Extending and Embedding','ext','contents'),
187 Book('api','Python/C API','api','contents','genindex')
Tim Peters52cfa332002-04-19 16:09:26 +0000188 ],
189
190 # library for 1.5 version:
191 '1.5':
192 [
Tim Petersd9a10502002-04-20 03:25:02 +0000193 Book('tut','Tutorial','tut','node1'),
194 Book('lib','Library Reference','lib','node1','node268'),
195 Book('ref','Language Reference','ref-1','ref-2','ref-11'),
196 Book('ext','Extending and Embedding','ext','node1'),
197 Book('api','Python/C API','api','node1','node48')
Tim Peters52cfa332002-04-19 16:09:26 +0000198 ]
199}
200
Tim Peterse21095e2002-04-20 08:36:42 +0000201# AlmostNullWriter doesn't print anything; it just arranges to save the
202# text sent to send_flowing_data(). This is used to capture the text
203# between an anchor begin/end pair, e.g. for TOC entries.
204
205class AlmostNullWriter(NullWriter):
206
207 def __init__(self):
208 NullWriter.__init__(self)
209 self.saved_clear()
Tim Peters52cfa332002-04-19 16:09:26 +0000210
Tim Peters661e4922002-04-20 02:39:44 +0000211 def send_flowing_data(self, data):
Tim Peterse21095e2002-04-20 08:36:42 +0000212 stripped = data.strip()
213 if stripped: # don't bother to save runs of whitespace
214 self.saved.append(stripped)
Tim Peters52cfa332002-04-19 16:09:26 +0000215
Tim Peterse21095e2002-04-20 08:36:42 +0000216 # Forget all saved text.
217 def saved_clear(self):
218 self.saved = []
Tim Peters52cfa332002-04-19 16:09:26 +0000219
Tim Peterse21095e2002-04-20 08:36:42 +0000220 # Return all saved text as a string.
221 def saved_get(self):
222 return ' '.join(self.saved)
223
224class HelpHtmlParser(HTMLParser):
225
226 def __init__(self, formatter, path, output):
227 HTMLParser.__init__(self, formatter)
228 self.path = path # relative path
229 self.ft = output # output file
230 self.indent = 0 # number of tabs for pretty printing of files
231 self.proc = False # True when actively processing, else False
232 # (headers, footers, etc)
Tim Peters45454072002-04-20 20:26:26 +0000233 # XXX This shouldn't need to be a stack -- anchors shouldn't nest.
234 # XXX See SF bug <http://www.python.org/sf/546579>.
235 self.hrefstack = [] # stack of hrefs from anchor begins
Tim Peters52cfa332002-04-19 16:09:26 +0000236
Tim Peters661e4922002-04-20 02:39:44 +0000237 def begin_group(self):
Tim Peterse21095e2002-04-20 08:36:42 +0000238 self.indent += 1
239 self.proc = True
Tim Peters52cfa332002-04-19 16:09:26 +0000240
Tim Peterse21095e2002-04-20 08:36:42 +0000241 def finish_group(self):
242 self.indent -= 1
243 # stop processing when back to top level
244 self.proc = self.indent > 0
Tim Peters52cfa332002-04-19 16:09:26 +0000245
Tim Peters661e4922002-04-20 02:39:44 +0000246 def anchor_bgn(self, href, name, type):
247 if self.proc:
Tim Peterse21095e2002-04-20 08:36:42 +0000248 self.saved_clear()
Tim Peters45454072002-04-20 20:26:26 +0000249 self.hrefstack.append(href)
Tim Peters52cfa332002-04-19 16:09:26 +0000250
Tim Peters661e4922002-04-20 02:39:44 +0000251 def anchor_end(self):
252 if self.proc:
Tim Peters45454072002-04-20 20:26:26 +0000253 title = cgi.escape(self.saved_get(), True)
254 path = self.path + '/' + self.hrefstack.pop()
255 # XXX See SF bug <http://www.python.org/sf/546579>.
256 # XXX index.html for the 2.2 language reference manual contains
257 # XXX nested <a></a> tags in the entry for the section on blank
258 # XXX lines. We want to ignore the nested part completely.
259 if len(self.hrefstack) == 0:
260 self.tab(object_sitemap % (title, path))
Tim Peters52cfa332002-04-19 16:09:26 +0000261
Tim Peters661e4922002-04-20 02:39:44 +0000262 def start_dl(self, atr_val):
Tim Peters52cfa332002-04-19 16:09:26 +0000263 self.begin_group()
264
Tim Peters661e4922002-04-20 02:39:44 +0000265 def end_dl(self):
Tim Peterse21095e2002-04-20 08:36:42 +0000266 self.finish_group()
Tim Peters52cfa332002-04-19 16:09:26 +0000267
Tim Peters661e4922002-04-20 02:39:44 +0000268 def do_dt(self, atr_val):
Tim Peterse21095e2002-04-20 08:36:42 +0000269 # no trailing newline on purpose!
270 self.tab("<LI>")
Tim Peters52cfa332002-04-19 16:09:26 +0000271
Tim Peterse21095e2002-04-20 08:36:42 +0000272 # Write text to output file.
273 def write(self, text):
274 self.ft.write(text)
275
276 # Write text to output file after indenting by self.indent tabs.
277 def tab(self, text=''):
278 self.write('\t' * self.indent)
279 if text:
280 self.write(text)
281
282 # Forget all saved text.
283 def saved_clear(self):
284 self.formatter.writer.saved_clear()
285
286 # Return all saved text as a string.
287 def saved_get(self):
288 return self.formatter.writer.saved_get()
Tim Peters52cfa332002-04-19 16:09:26 +0000289
Tim Peters661e4922002-04-20 02:39:44 +0000290class IdxHlpHtmlParser(HelpHtmlParser):
Tim Peters52cfa332002-04-19 16:09:26 +0000291 # nothing special here, seems enough with parent class
292 pass
293
Tim Peters661e4922002-04-20 02:39:44 +0000294class TocHlpHtmlParser(HelpHtmlParser):
Tim Peters52cfa332002-04-19 16:09:26 +0000295
Tim Peters661e4922002-04-20 02:39:44 +0000296 def start_dl(self, atr_val):
Tim Peters52cfa332002-04-19 16:09:26 +0000297 self.begin_group()
Tim Peterse21095e2002-04-20 08:36:42 +0000298 self.tab('<UL>\n')
Tim Peters52cfa332002-04-19 16:09:26 +0000299
Tim Peters661e4922002-04-20 02:39:44 +0000300 def end_dl(self):
Tim Peterse21095e2002-04-20 08:36:42 +0000301 self.finish_group()
302 self.tab('</UL>\n')
Tim Peters52cfa332002-04-19 16:09:26 +0000303
Tim Peters661e4922002-04-20 02:39:44 +0000304 def start_ul(self, atr_val):
Tim Peters52cfa332002-04-19 16:09:26 +0000305 self.begin_group()
Tim Peterse21095e2002-04-20 08:36:42 +0000306 self.tab('<UL>\n')
Tim Peters52cfa332002-04-19 16:09:26 +0000307
Tim Peters661e4922002-04-20 02:39:44 +0000308 def end_ul(self):
Tim Peterse21095e2002-04-20 08:36:42 +0000309 self.finish_group()
310 self.tab('</UL>\n')
Tim Peters52cfa332002-04-19 16:09:26 +0000311
Tim Peters661e4922002-04-20 02:39:44 +0000312 def do_li(self, atr_val):
Tim Peterse21095e2002-04-20 08:36:42 +0000313 # no trailing newline on purpose!
314 self.tab("<LI>")
Tim Peters52cfa332002-04-19 16:09:26 +0000315
Tim Petersd9a10502002-04-20 03:25:02 +0000316def index(path, indexpage, output):
Tim Peterse21095e2002-04-20 08:36:42 +0000317 parser = IdxHlpHtmlParser(AbstractFormatter(AlmostNullWriter()),
318 path, output)
Tim Petersd9a10502002-04-20 03:25:02 +0000319 f = open(path + '/' + indexpage)
320 parser.feed(f.read())
Tim Peters52cfa332002-04-19 16:09:26 +0000321 parser.close()
Tim Petersd9a10502002-04-20 03:25:02 +0000322 f.close()
Tim Peters52cfa332002-04-19 16:09:26 +0000323
Tim Petersd9a10502002-04-20 03:25:02 +0000324def content(path, contentpage, output):
Tim Peterse21095e2002-04-20 08:36:42 +0000325 parser = TocHlpHtmlParser(AbstractFormatter(AlmostNullWriter()),
326 path, output)
Tim Petersd9a10502002-04-20 03:25:02 +0000327 f = open(path + '/' + contentpage)
328 parser.feed(f.read())
Tim Peters52cfa332002-04-19 16:09:26 +0000329 parser.close()
Tim Petersd9a10502002-04-20 03:25:02 +0000330 f.close()
Tim Peters52cfa332002-04-19 16:09:26 +0000331
Tim Peters661e4922002-04-20 02:39:44 +0000332def do_index(library, output):
Tim Peters52cfa332002-04-19 16:09:26 +0000333 output.write('<UL>\n')
Tim Peters661e4922002-04-20 02:39:44 +0000334 for book in library:
Tim Petersd9a10502002-04-20 03:25:02 +0000335 print '\t', book.title, '-', book.indexpage
336 if book.indexpage:
337 index(book.directory, book.indexpage, output)
Tim Peters52cfa332002-04-19 16:09:26 +0000338 output.write('</UL>\n')
339
Tim Peters661e4922002-04-20 02:39:44 +0000340def do_content(library, version, output):
Tim Petersc8490c72002-04-20 21:34:34 +0000341 output.write(contents_header)
Tim Peters661e4922002-04-20 02:39:44 +0000342 for book in library:
Tim Petersd9a10502002-04-20 03:25:02 +0000343 print '\t', book.title, '-', book.firstpage
Tim Peters45454072002-04-20 20:26:26 +0000344 path = book.directory + "/" + book.firstpage
345 output.write('<LI>')
346 output.write(object_sitemap % (book.title, path))
Tim Petersd9a10502002-04-20 03:25:02 +0000347 if book.contentpage:
348 content(book.directory, book.contentpage, output)
Tim Peters52cfa332002-04-19 16:09:26 +0000349 output.write(contents_footer)
350
Tim Peters460643b2002-04-20 02:37:07 +0000351# Fill in the [FILES] section of the project (.hhp) file.
352# 'library' is the list of directory description tuples from
353# supported_libraries for the version of the docs getting generated.
354def do_project(library, output, arch, version):
Tim Peterse6b63e62002-04-19 18:07:52 +0000355 output.write(project_template % locals())
Tim Petersc8490c72002-04-20 21:34:34 +0000356 pathseen = {}
Tim Peters460643b2002-04-20 02:37:07 +0000357 for book in library:
Tim Petersd9a10502002-04-20 03:25:02 +0000358 directory = book.directory
Tim Peters460643b2002-04-20 02:37:07 +0000359 path = directory + '\\%s\n'
360 for page in os.listdir(directory):
361 if page.endswith('.html') or page.endswith('.css'):
Tim Petersc8490c72002-04-20 21:34:34 +0000362 fullpath = path % page
363 if fullpath not in pathseen:
364 output.write(fullpath)
365 pathseen[fullpath] = True
Tim Peters52cfa332002-04-19 16:09:26 +0000366
Tim Peters661e4922002-04-20 02:39:44 +0000367def openfile(file):
368 try:
Tim Peters52cfa332002-04-19 16:09:26 +0000369 p = open(file, "w")
Tim Peters661e4922002-04-20 02:39:44 +0000370 except IOError, msg:
Tim Peters52cfa332002-04-19 16:09:26 +0000371 print file, ":", msg
372 sys.exit(1)
373 return p
374
Tim Peters661e4922002-04-20 02:39:44 +0000375def usage():
Tim Peters52cfa332002-04-19 16:09:26 +0000376 print usage_mode
377 sys.exit(0)
378
Tim Peters661e4922002-04-20 02:39:44 +0000379def do_it(args = None):
380 if not args:
Tim Peters52cfa332002-04-19 16:09:26 +0000381 args = sys.argv[1:]
382
Tim Peters661e4922002-04-20 02:39:44 +0000383 if not args:
Tim Peters52cfa332002-04-19 16:09:26 +0000384 usage()
385
Tim Peters661e4922002-04-20 02:39:44 +0000386 try:
Tim Peters52cfa332002-04-19 16:09:26 +0000387 optlist, args = getopt.getopt(args, 'ckpv:')
Tim Peters661e4922002-04-20 02:39:44 +0000388 except getopt.error, msg:
Tim Peters52cfa332002-04-19 16:09:26 +0000389 print msg
390 usage()
391
Tim Peters661e4922002-04-20 02:39:44 +0000392 if not args or len(args) > 1:
Tim Peters52cfa332002-04-19 16:09:26 +0000393 usage()
394 arch = args[0]
395
396 version = None
397 for opt in optlist:
398 if opt[0] == '-v':
399 version = opt[1]
400 break
401 if not version:
402 usage()
403
Tim Petersd9a10502002-04-20 03:25:02 +0000404 library = supported_libraries[version]
Tim Peters52cfa332002-04-19 16:09:26 +0000405
Tim Peters661e4922002-04-20 02:39:44 +0000406 if not (('-p','') in optlist):
Tim Peters4f109c12002-04-19 18:41:46 +0000407 fname = arch + '.stp'
408 f = openfile(fname)
409 print "Building stoplist", fname, "..."
410 words = stop_list.split()
411 words.sort()
412 for word in words:
413 print >> f, word
414 f.close()
415
Tim Peters52cfa332002-04-19 16:09:26 +0000416 f = openfile(arch + '.hhp')
417 print "Building Project..."
418 do_project(library, f, arch, version)
419 if version == '2.0.0':
420 for image in os.listdir('icons'):
421 f.write('icons'+ '\\' + image + '\n')
422
423 f.close()
424
Tim Peters661e4922002-04-20 02:39:44 +0000425 if not (('-c','') in optlist):
Tim Peters52cfa332002-04-19 16:09:26 +0000426 f = openfile(arch + '.hhc')
427 print "Building Table of Content..."
428 do_content(library, version, f)
429 f.close()
430
Tim Peters661e4922002-04-20 02:39:44 +0000431 if not (('-k','') in optlist):
Tim Peters52cfa332002-04-19 16:09:26 +0000432 f = openfile(arch + '.hhk')
433 print "Building Index..."
434 do_index(library, f)
435 f.close()
436
Tim Peters661e4922002-04-20 02:39:44 +0000437if __name__ == '__main__':
Tim Peters52cfa332002-04-19 16:09:26 +0000438 do_it()