blob: 59a2d2101d45487c3dfdc0ef6a3a9830876d5a8c [file] [log] [blame]
Tim Peters4f109c12002-04-19 18:41:46 +00001"""
Tim Peters52cfa332002-04-19 16:09:26 +00002 Makes the necesary files to convert from plain html of
3 Python 1.5 and 1.5.x Documentation to
4 Microsoft HTML Help format version 1.1
5 Doesn't change the html's docs.
6
7 by hernan.foffani@iname.com
8 no copyright and no responsabilities.
9
10 modified by Dale Nagata for Python 1.5.2
11
12 Renamed from make_chm.py to prechm.py, and checked into the Python
13 project, 19-Apr-2002 by Tim Peters. Assorted modifications by Tim
14 and Fred Drake. Obtained from Robin Dunn's .chm packaging of the
15 Python 2.2 docs, at <http://alldunn.com/python/>.
Tim Peters4f109c12002-04-19 18:41:46 +000016"""
Tim Peters52cfa332002-04-19 16:09:26 +000017
18import sys
19import os
Tim Peterse21095e2002-04-20 08:36:42 +000020from formatter import NullWriter, AbstractFormatter
21from htmllib import HTMLParser
Tim Peters52cfa332002-04-19 16:09:26 +000022import getopt
Tim Peters45454072002-04-20 20:26:26 +000023import cgi
Tim Peters52cfa332002-04-19 16:09:26 +000024
Tim Peters52cfa332002-04-19 16:09:26 +000025usage_mode = '''
26Usage: make_chm.py [-c] [-k] [-p] [-v 1.5[.x]] filename
27 -c: does not build filename.hhc (Table of Contents)
28 -k: does not build filename.hhk (Index)
29 -p: does not build filename.hhp (Project File)
30 -v 1.5[.x]: makes help for the python 1.5[.x] docs
31 (default is python 1.5.2 docs)
32'''
33
Tim Peterse6b63e62002-04-19 18:07:52 +000034# Project file (*.hhp) template. 'arch' is the file basename (like
35# the pythlp in pythlp.hhp); 'version' is the doc version number (like
36# the 2.2 in Python 2.2).
37# The magical numbers in the long line under [WINDOWS] set most of the
38# user-visible features (visible buttons, tabs, etc).
Tim Peters52cfa332002-04-19 16:09:26 +000039project_template = '''
40[OPTIONS]
Tim Peterse6b63e62002-04-19 18:07:52 +000041Compiled file=%(arch)s.chm
42Contents file=%(arch)s.hhc
43Default Window=%(arch)s
Tim Peters52cfa332002-04-19 16:09:26 +000044Default topic=index.html
45Display compile progress=No
Tim Peters4f109c12002-04-19 18:41:46 +000046Full text search stop list file=%(arch)s.stp
Tim Peters52cfa332002-04-19 16:09:26 +000047Full-text search=Yes
Tim Peterse6b63e62002-04-19 18:07:52 +000048Index file=%(arch)s.hhk
Tim Peters52cfa332002-04-19 16:09:26 +000049Language=0x409
Tim Peterse6b63e62002-04-19 18:07:52 +000050Title=Python %(version)s Documentation
Tim Peters52cfa332002-04-19 16:09:26 +000051
52[WINDOWS]
Tim Peterse6b63e62002-04-19 18:07:52 +000053%(arch)s="Python %(version)s Documentation","%(arch)s.hhc","%(arch)s.hhk",\
54"index.html","index.html",,,,,0x63520,220,0x384e,[271,372,740,718],,,,,,,0
Tim Peters52cfa332002-04-19 16:09:26 +000055
56[FILES]
57'''
58
Tim Peters45454072002-04-20 20:26:26 +000059contents_header = '''\
60<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
61<HTML>
62<HEAD>
63<meta name="GENERATOR" content="Microsoft&reg; HTML Help Workshop 4.1">
64<!-- Sitemap 1.0 -->
65</HEAD><BODY>
Tim Peters52cfa332002-04-19 16:09:26 +000066<OBJECT type="text/site properties">
67 <param name="Window Styles" value="0x801227">
Tim Petersa9053632002-04-19 16:46:43 +000068 <param name="ImageType" value="Folder">
Tim Peters52cfa332002-04-19 16:09:26 +000069</OBJECT>
70<UL>
Tim Peters45454072002-04-20 20:26:26 +000071<LI><OBJECT type="text/sitemap">
72 <param name="Name" value="Python %s Docs">
73 <param name="Local" value="./index.html">
74 </OBJECT>
Tim Peters52cfa332002-04-19 16:09:26 +000075<UL>
76'''
77
Tim Peters45454072002-04-20 20:26:26 +000078contents_footer = '''\
79</UL></UL></BODY></HTML>
Tim Peters52cfa332002-04-19 16:09:26 +000080'''
81
Tim Peters45454072002-04-20 20:26:26 +000082object_sitemap = '''\
83<OBJECT type="text/sitemap">
84 <param name="Name" value="%s">
85 <param name="Local" value="%s">
86</OBJECT>
Tim Peters52cfa332002-04-19 16:09:26 +000087'''
88
Tim Peters4f109c12002-04-19 18:41:46 +000089# List of words the full text search facility shouldn't index. This
90# becomes file ARCH.stp. Note that this list must be pretty small!
91# Different versions of the MS docs claim the file has a maximum size of
92# 256 or 512 bytes (including \r\n at the end of each line).
93# Note that "and", "or", "not" and "near" are operators in the search
Tim Peters8d62ad72002-04-20 02:56:20 +000094# language, so no point indexing them even if we wanted to.
Tim Peters4f109c12002-04-19 18:41:46 +000095stop_list = '''
96a an and
97is
98near
99not
100of
101or
102the
103'''
104
Tim Petersd9a10502002-04-20 03:25:02 +0000105# s is a string or None. If None or empty, return None. Else tack '.html'
106# on to the end, unless it's already there.
107def addhtml(s):
108 if s:
109 if not s.endswith('.html'):
110 s += '.html'
111 return s
112
113# Convenience class to hold info about "a book" in HTMLHelp terms == a doc
114# directory in Python terms.
115class Book:
116 def __init__(self, directory, title, firstpage,
117 contentpage=None, indexpage=None):
118 self.directory = directory
119 self.title = title
120 self.firstpage = addhtml(firstpage)
121 self.contentpage = addhtml(contentpage)
122 self.indexpage = addhtml(indexpage)
123
124# Library Doc list of books:
Tim Peters661e4922002-04-20 02:39:44 +0000125# each 'book' : (Dir, Title, First page, Content page, Index page)
Tim Peters52cfa332002-04-19 16:09:26 +0000126supported_libraries = {
127 '2.2': ### Beta!!! fix for actual release
128 [
Tim Petersd9a10502002-04-20 03:25:02 +0000129 Book('.', 'Global Module Index', 'modindex'),
130 Book('whatsnew', "What's New", 'index', 'contents'),
131 Book('tut','Tutorial','tut','node2'),
132 Book('lib','Library Reference','lib','contents','genindex'),
133 Book('ref','Language Reference','ref','contents','genindex'),
134 Book('mac','Macintosh Reference','mac','contents','genindex'),
135 Book('ext','Extending and Embedding','ext','contents'),
136 Book('api','Python/C API','api','contents','genindex'),
137 Book('doc','Documenting Python','doc','contents'),
138 Book('inst','Installing Python Modules', 'inst', 'index'),
139 Book('dist','Distributing Python Modules', 'dist', 'index'),
Tim Peters52cfa332002-04-19 16:09:26 +0000140 ],
141
142 '2.1.1':
143 [
Tim Petersd9a10502002-04-20 03:25:02 +0000144 Book('.', 'Global Module Index', 'modindex'),
145 Book('tut','Tutorial','tut','node2'),
146 Book('lib','Library Reference','lib','contents','genindex'),
147 Book('ref','Language Reference','ref','contents','genindex'),
148 Book('mac','Macintosh Reference','mac','contents','genindex'),
149 Book('ext','Extending and Embedding','ext','contents'),
150 Book('api','Python/C API','api','contents','genindex'),
151 Book('doc','Documenting Python','doc','contents'),
152 Book('inst','Installing Python Modules', 'inst', 'index'),
153 Book('dist','Distributing Python Modules', 'dist', 'index'),
Tim Peters52cfa332002-04-19 16:09:26 +0000154 ],
155
156 '2.0.0':
157 [
Tim Petersd9a10502002-04-20 03:25:02 +0000158 Book('.', 'Global Module Index', 'modindex'),
159 Book('tut','Tutorial','tut','node2'),
160 Book('lib','Library Reference','lib','contents','genindex'),
161 Book('ref','Language Reference','ref','contents','genindex'),
162 Book('mac','Macintosh Reference','mac','contents','genindex'),
163 Book('ext','Extending and Embedding','ext','contents'),
164 Book('api','Python/C API','api','contents','genindex'),
165 Book('doc','Documenting Python','doc','contents'),
166 Book('inst','Installing Python Modules', 'inst', 'contents'),
167 Book('dist','Distributing Python Modules', 'dist', 'contents'),
Tim Peters52cfa332002-04-19 16:09:26 +0000168 ],
169
170 # <dnagata@creo.com> Apr 17/99: library for 1.5.2 version:
171 # <hernan.foffani@iname.com> May 01/99: library for 1.5.2 (04/30/99):
172 '1.5.2':
173 [
Tim Petersd9a10502002-04-20 03:25:02 +0000174 Book('tut','Tutorial','tut','node2'),
175 Book('lib','Library Reference','lib','contents','genindex'),
176 Book('ref','Language Reference','ref','contents','genindex'),
177 Book('mac','Macintosh Reference','mac','contents','genindex'),
178 Book('ext','Extending and Embedding','ext','contents'),
179 Book('api','Python/C API','api','contents','genindex'),
180 Book('doc','Documenting Python','doc','contents')
Tim Peters52cfa332002-04-19 16:09:26 +0000181 ],
182
183 # library for 1.5.1 version:
184 '1.5.1':
185 [
Tim Petersd9a10502002-04-20 03:25:02 +0000186 Book('tut','Tutorial','tut','contents'),
187 Book('lib','Library Reference','lib','contents','genindex'),
188 Book('ref','Language Reference','ref-1','ref-2','ref-11'),
189 Book('ext','Extending and Embedding','ext','contents'),
190 Book('api','Python/C API','api','contents','genindex')
Tim Peters52cfa332002-04-19 16:09:26 +0000191 ],
192
193 # library for 1.5 version:
194 '1.5':
195 [
Tim Petersd9a10502002-04-20 03:25:02 +0000196 Book('tut','Tutorial','tut','node1'),
197 Book('lib','Library Reference','lib','node1','node268'),
198 Book('ref','Language Reference','ref-1','ref-2','ref-11'),
199 Book('ext','Extending and Embedding','ext','node1'),
200 Book('api','Python/C API','api','node1','node48')
Tim Peters52cfa332002-04-19 16:09:26 +0000201 ]
202}
203
Tim Peterse21095e2002-04-20 08:36:42 +0000204# AlmostNullWriter doesn't print anything; it just arranges to save the
205# text sent to send_flowing_data(). This is used to capture the text
206# between an anchor begin/end pair, e.g. for TOC entries.
207
208class AlmostNullWriter(NullWriter):
209
210 def __init__(self):
211 NullWriter.__init__(self)
212 self.saved_clear()
Tim Peters52cfa332002-04-19 16:09:26 +0000213
Tim Peters661e4922002-04-20 02:39:44 +0000214 def send_flowing_data(self, data):
Tim Peterse21095e2002-04-20 08:36:42 +0000215 stripped = data.strip()
216 if stripped: # don't bother to save runs of whitespace
217 self.saved.append(stripped)
Tim Peters52cfa332002-04-19 16:09:26 +0000218
Tim Peterse21095e2002-04-20 08:36:42 +0000219 # Forget all saved text.
220 def saved_clear(self):
221 self.saved = []
Tim Peters52cfa332002-04-19 16:09:26 +0000222
Tim Peterse21095e2002-04-20 08:36:42 +0000223 # Return all saved text as a string.
224 def saved_get(self):
225 return ' '.join(self.saved)
226
227class HelpHtmlParser(HTMLParser):
228
229 def __init__(self, formatter, path, output):
230 HTMLParser.__init__(self, formatter)
231 self.path = path # relative path
232 self.ft = output # output file
233 self.indent = 0 # number of tabs for pretty printing of files
234 self.proc = False # True when actively processing, else False
235 # (headers, footers, etc)
Tim Peters45454072002-04-20 20:26:26 +0000236 # XXX This shouldn't need to be a stack -- anchors shouldn't nest.
237 # XXX See SF bug <http://www.python.org/sf/546579>.
238 self.hrefstack = [] # stack of hrefs from anchor begins
Tim Peters52cfa332002-04-19 16:09:26 +0000239
Tim Peters661e4922002-04-20 02:39:44 +0000240 def begin_group(self):
Tim Peterse21095e2002-04-20 08:36:42 +0000241 self.indent += 1
242 self.proc = True
Tim Peters52cfa332002-04-19 16:09:26 +0000243
Tim Peterse21095e2002-04-20 08:36:42 +0000244 def finish_group(self):
245 self.indent -= 1
246 # stop processing when back to top level
247 self.proc = self.indent > 0
Tim Peters52cfa332002-04-19 16:09:26 +0000248
Tim Peters661e4922002-04-20 02:39:44 +0000249 def anchor_bgn(self, href, name, type):
250 if self.proc:
Tim Peterse21095e2002-04-20 08:36:42 +0000251 self.saved_clear()
Tim Peters45454072002-04-20 20:26:26 +0000252 self.hrefstack.append(href)
Tim Peters52cfa332002-04-19 16:09:26 +0000253
Tim Peters661e4922002-04-20 02:39:44 +0000254 def anchor_end(self):
255 if self.proc:
Tim Peters45454072002-04-20 20:26:26 +0000256 title = cgi.escape(self.saved_get(), True)
257 path = self.path + '/' + self.hrefstack.pop()
258 # XXX See SF bug <http://www.python.org/sf/546579>.
259 # XXX index.html for the 2.2 language reference manual contains
260 # XXX nested <a></a> tags in the entry for the section on blank
261 # XXX lines. We want to ignore the nested part completely.
262 if len(self.hrefstack) == 0:
263 self.tab(object_sitemap % (title, path))
Tim Peters52cfa332002-04-19 16:09:26 +0000264
Tim Peters661e4922002-04-20 02:39:44 +0000265 def start_dl(self, atr_val):
Tim Peters52cfa332002-04-19 16:09:26 +0000266 self.begin_group()
267
Tim Peters661e4922002-04-20 02:39:44 +0000268 def end_dl(self):
Tim Peterse21095e2002-04-20 08:36:42 +0000269 self.finish_group()
Tim Peters52cfa332002-04-19 16:09:26 +0000270
Tim Peters661e4922002-04-20 02:39:44 +0000271 def do_dt(self, atr_val):
Tim Peterse21095e2002-04-20 08:36:42 +0000272 # no trailing newline on purpose!
273 self.tab("<LI>")
Tim Peters52cfa332002-04-19 16:09:26 +0000274
Tim Peterse21095e2002-04-20 08:36:42 +0000275 # Write text to output file.
276 def write(self, text):
277 self.ft.write(text)
278
279 # Write text to output file after indenting by self.indent tabs.
280 def tab(self, text=''):
281 self.write('\t' * self.indent)
282 if text:
283 self.write(text)
284
285 # Forget all saved text.
286 def saved_clear(self):
287 self.formatter.writer.saved_clear()
288
289 # Return all saved text as a string.
290 def saved_get(self):
291 return self.formatter.writer.saved_get()
Tim Peters52cfa332002-04-19 16:09:26 +0000292
Tim Peters661e4922002-04-20 02:39:44 +0000293class IdxHlpHtmlParser(HelpHtmlParser):
Tim Peters52cfa332002-04-19 16:09:26 +0000294 # nothing special here, seems enough with parent class
295 pass
296
Tim Peters661e4922002-04-20 02:39:44 +0000297class TocHlpHtmlParser(HelpHtmlParser):
Tim Peters52cfa332002-04-19 16:09:26 +0000298
Tim Peters661e4922002-04-20 02:39:44 +0000299 def start_dl(self, atr_val):
Tim Peters52cfa332002-04-19 16:09:26 +0000300 self.begin_group()
Tim Peterse21095e2002-04-20 08:36:42 +0000301 self.tab('<UL>\n')
Tim Peters52cfa332002-04-19 16:09:26 +0000302
Tim Peters661e4922002-04-20 02:39:44 +0000303 def end_dl(self):
Tim Peterse21095e2002-04-20 08:36:42 +0000304 self.finish_group()
305 self.tab('</UL>\n')
Tim Peters52cfa332002-04-19 16:09:26 +0000306
Tim Peters661e4922002-04-20 02:39:44 +0000307 def start_ul(self, atr_val):
Tim Peters52cfa332002-04-19 16:09:26 +0000308 self.begin_group()
Tim Peterse21095e2002-04-20 08:36:42 +0000309 self.tab('<UL>\n')
Tim Peters52cfa332002-04-19 16:09:26 +0000310
Tim Peters661e4922002-04-20 02:39:44 +0000311 def end_ul(self):
Tim Peterse21095e2002-04-20 08:36:42 +0000312 self.finish_group()
313 self.tab('</UL>\n')
Tim Peters52cfa332002-04-19 16:09:26 +0000314
Tim Peters661e4922002-04-20 02:39:44 +0000315 def do_li(self, atr_val):
Tim Peterse21095e2002-04-20 08:36:42 +0000316 # no trailing newline on purpose!
317 self.tab("<LI>")
Tim Peters52cfa332002-04-19 16:09:26 +0000318
Tim Petersd9a10502002-04-20 03:25:02 +0000319def index(path, indexpage, output):
Tim Peterse21095e2002-04-20 08:36:42 +0000320 parser = IdxHlpHtmlParser(AbstractFormatter(AlmostNullWriter()),
321 path, output)
Tim Petersd9a10502002-04-20 03:25:02 +0000322 f = open(path + '/' + indexpage)
323 parser.feed(f.read())
Tim Peters52cfa332002-04-19 16:09:26 +0000324 parser.close()
Tim Petersd9a10502002-04-20 03:25:02 +0000325 f.close()
Tim Peters52cfa332002-04-19 16:09:26 +0000326
Tim Petersd9a10502002-04-20 03:25:02 +0000327def content(path, contentpage, output):
Tim Peterse21095e2002-04-20 08:36:42 +0000328 parser = TocHlpHtmlParser(AbstractFormatter(AlmostNullWriter()),
329 path, output)
Tim Petersd9a10502002-04-20 03:25:02 +0000330 f = open(path + '/' + contentpage)
331 parser.feed(f.read())
Tim Peters52cfa332002-04-19 16:09:26 +0000332 parser.close()
Tim Petersd9a10502002-04-20 03:25:02 +0000333 f.close()
Tim Peters52cfa332002-04-19 16:09:26 +0000334
Tim Peters661e4922002-04-20 02:39:44 +0000335def do_index(library, output):
Tim Peters52cfa332002-04-19 16:09:26 +0000336 output.write('<UL>\n')
Tim Peters661e4922002-04-20 02:39:44 +0000337 for book in library:
Tim Petersd9a10502002-04-20 03:25:02 +0000338 print '\t', book.title, '-', book.indexpage
339 if book.indexpage:
340 index(book.directory, book.indexpage, output)
Tim Peters52cfa332002-04-19 16:09:26 +0000341 output.write('</UL>\n')
342
Tim Peters661e4922002-04-20 02:39:44 +0000343def do_content(library, version, output):
Tim Peters52cfa332002-04-19 16:09:26 +0000344 output.write(contents_header % version)
Tim Peters661e4922002-04-20 02:39:44 +0000345 for book in library:
Tim Petersd9a10502002-04-20 03:25:02 +0000346 print '\t', book.title, '-', book.firstpage
Tim Peters45454072002-04-20 20:26:26 +0000347 path = book.directory + "/" + book.firstpage
348 output.write('<LI>')
349 output.write(object_sitemap % (book.title, path))
Tim Petersd9a10502002-04-20 03:25:02 +0000350 if book.contentpage:
351 content(book.directory, book.contentpage, output)
Tim Peters52cfa332002-04-19 16:09:26 +0000352 output.write(contents_footer)
353
Tim Peters460643b2002-04-20 02:37:07 +0000354# Fill in the [FILES] section of the project (.hhp) file.
355# 'library' is the list of directory description tuples from
356# supported_libraries for the version of the docs getting generated.
357def do_project(library, output, arch, version):
Tim Peterse6b63e62002-04-19 18:07:52 +0000358 output.write(project_template % locals())
Tim Peters460643b2002-04-20 02:37:07 +0000359 for book in library:
Tim Petersd9a10502002-04-20 03:25:02 +0000360 directory = book.directory
Tim Peters460643b2002-04-20 02:37:07 +0000361 path = directory + '\\%s\n'
362 for page in os.listdir(directory):
363 if page.endswith('.html') or page.endswith('.css'):
364 output.write(path % page)
Tim Peters52cfa332002-04-19 16:09:26 +0000365
Tim Peters661e4922002-04-20 02:39:44 +0000366def openfile(file):
367 try:
Tim Peters52cfa332002-04-19 16:09:26 +0000368 p = open(file, "w")
Tim Peters661e4922002-04-20 02:39:44 +0000369 except IOError, msg:
Tim Peters52cfa332002-04-19 16:09:26 +0000370 print file, ":", msg
371 sys.exit(1)
372 return p
373
Tim Peters661e4922002-04-20 02:39:44 +0000374def usage():
Tim Peters52cfa332002-04-19 16:09:26 +0000375 print usage_mode
376 sys.exit(0)
377
Tim Peters661e4922002-04-20 02:39:44 +0000378def do_it(args = None):
379 if not args:
Tim Peters52cfa332002-04-19 16:09:26 +0000380 args = sys.argv[1:]
381
Tim Peters661e4922002-04-20 02:39:44 +0000382 if not args:
Tim Peters52cfa332002-04-19 16:09:26 +0000383 usage()
384
Tim Peters661e4922002-04-20 02:39:44 +0000385 try:
Tim Peters52cfa332002-04-19 16:09:26 +0000386 optlist, args = getopt.getopt(args, 'ckpv:')
Tim Peters661e4922002-04-20 02:39:44 +0000387 except getopt.error, msg:
Tim Peters52cfa332002-04-19 16:09:26 +0000388 print msg
389 usage()
390
Tim Peters661e4922002-04-20 02:39:44 +0000391 if not args or len(args) > 1:
Tim Peters52cfa332002-04-19 16:09:26 +0000392 usage()
393 arch = args[0]
394
395 version = None
396 for opt in optlist:
397 if opt[0] == '-v':
398 version = opt[1]
399 break
400 if not version:
401 usage()
402
Tim Petersd9a10502002-04-20 03:25:02 +0000403 library = supported_libraries[version]
Tim Peters52cfa332002-04-19 16:09:26 +0000404
Tim Peters661e4922002-04-20 02:39:44 +0000405 if not (('-p','') in optlist):
Tim Peters4f109c12002-04-19 18:41:46 +0000406 fname = arch + '.stp'
407 f = openfile(fname)
408 print "Building stoplist", fname, "..."
409 words = stop_list.split()
410 words.sort()
411 for word in words:
412 print >> f, word
413 f.close()
414
Tim Peters52cfa332002-04-19 16:09:26 +0000415 f = openfile(arch + '.hhp')
416 print "Building Project..."
417 do_project(library, f, arch, version)
418 if version == '2.0.0':
419 for image in os.listdir('icons'):
420 f.write('icons'+ '\\' + image + '\n')
421
422 f.close()
423
Tim Peters661e4922002-04-20 02:39:44 +0000424 if not (('-c','') in optlist):
Tim Peters52cfa332002-04-19 16:09:26 +0000425 f = openfile(arch + '.hhc')
426 print "Building Table of Content..."
427 do_content(library, version, f)
428 f.close()
429
Tim Peters661e4922002-04-20 02:39:44 +0000430 if not (('-k','') in optlist):
Tim Peters52cfa332002-04-19 16:09:26 +0000431 f = openfile(arch + '.hhk')
432 print "Building Index..."
433 do_index(library, f)
434 f.close()
435
Tim Peters661e4922002-04-20 02:39:44 +0000436if __name__ == '__main__':
Tim Peters52cfa332002-04-19 16:09:26 +0000437 do_it()