blob: 675e40062b33773b13523b6503204f55f9aec28c [file] [log] [blame]
Tim Peters4f109c12002-04-19 18:41:46 +00001"""
Tim Peters52cfa332002-04-19 16:09:26 +00002 Makes the necesary files to convert from plain html of
3 Python 1.5 and 1.5.x Documentation to
4 Microsoft HTML Help format version 1.1
5 Doesn't change the html's docs.
6
7 by hernan.foffani@iname.com
8 no copyright and no responsabilities.
9
10 modified by Dale Nagata for Python 1.5.2
11
12 Renamed from make_chm.py to prechm.py, and checked into the Python
13 project, 19-Apr-2002 by Tim Peters. Assorted modifications by Tim
14 and Fred Drake. Obtained from Robin Dunn's .chm packaging of the
15 Python 2.2 docs, at <http://alldunn.com/python/>.
Tim Peters4f109c12002-04-19 18:41:46 +000016"""
Tim Peters52cfa332002-04-19 16:09:26 +000017
18import sys
19import os
Tim Peterse21095e2002-04-20 08:36:42 +000020from formatter import NullWriter, AbstractFormatter
21from htmllib import HTMLParser
Tim Peters52cfa332002-04-19 16:09:26 +000022import string
23import getopt
24
Tim Peters52cfa332002-04-19 16:09:26 +000025usage_mode = '''
26Usage: make_chm.py [-c] [-k] [-p] [-v 1.5[.x]] filename
27 -c: does not build filename.hhc (Table of Contents)
28 -k: does not build filename.hhk (Index)
29 -p: does not build filename.hhp (Project File)
30 -v 1.5[.x]: makes help for the python 1.5[.x] docs
31 (default is python 1.5.2 docs)
32'''
33
Tim Peterse6b63e62002-04-19 18:07:52 +000034# Project file (*.hhp) template. 'arch' is the file basename (like
35# the pythlp in pythlp.hhp); 'version' is the doc version number (like
36# the 2.2 in Python 2.2).
37# The magical numbers in the long line under [WINDOWS] set most of the
38# user-visible features (visible buttons, tabs, etc).
Tim Peters52cfa332002-04-19 16:09:26 +000039project_template = '''
40[OPTIONS]
Tim Peterse6b63e62002-04-19 18:07:52 +000041Compiled file=%(arch)s.chm
42Contents file=%(arch)s.hhc
43Default Window=%(arch)s
Tim Peters52cfa332002-04-19 16:09:26 +000044Default topic=index.html
45Display compile progress=No
Tim Peters4f109c12002-04-19 18:41:46 +000046Full text search stop list file=%(arch)s.stp
Tim Peters52cfa332002-04-19 16:09:26 +000047Full-text search=Yes
Tim Peterse6b63e62002-04-19 18:07:52 +000048Index file=%(arch)s.hhk
Tim Peters52cfa332002-04-19 16:09:26 +000049Language=0x409
Tim Peterse6b63e62002-04-19 18:07:52 +000050Title=Python %(version)s Documentation
Tim Peters52cfa332002-04-19 16:09:26 +000051
52[WINDOWS]
Tim Peterse6b63e62002-04-19 18:07:52 +000053%(arch)s="Python %(version)s Documentation","%(arch)s.hhc","%(arch)s.hhk",\
54"index.html","index.html",,,,,0x63520,220,0x384e,[271,372,740,718],,,,,,,0
Tim Peters52cfa332002-04-19 16:09:26 +000055
56[FILES]
57'''
58
59contents_header = '''
60<OBJECT type="text/site properties">
61 <param name="Window Styles" value="0x801227">
Tim Petersa9053632002-04-19 16:46:43 +000062 <param name="ImageType" value="Folder">
Tim Peters52cfa332002-04-19 16:09:26 +000063</OBJECT>
64<UL>
65<LI> <OBJECT type="text/sitemap">
66 <param name="Name" value="Python %s Docs">
67 <param name="Local" value="./index.html">
68 </OBJECT>
69<UL>
70'''
71
72contents_footer = '''
73</UL></UL>
74'''
75
76object_sitemap = '''
77 <LI> <OBJECT type="text/sitemap">
78 <param name="Local" value="%s">
79 <param name="Name" value="%s">
80 </OBJECT>
81'''
82
Tim Peters4f109c12002-04-19 18:41:46 +000083
84# List of words the full text search facility shouldn't index. This
85# becomes file ARCH.stp. Note that this list must be pretty small!
86# Different versions of the MS docs claim the file has a maximum size of
87# 256 or 512 bytes (including \r\n at the end of each line).
88# Note that "and", "or", "not" and "near" are operators in the search
Tim Peters8d62ad72002-04-20 02:56:20 +000089# language, so no point indexing them even if we wanted to.
Tim Peters4f109c12002-04-19 18:41:46 +000090stop_list = '''
91a an and
92is
93near
94not
95of
96or
97the
98'''
99
Tim Petersd9a10502002-04-20 03:25:02 +0000100# s is a string or None. If None or empty, return None. Else tack '.html'
101# on to the end, unless it's already there.
102def addhtml(s):
103 if s:
104 if not s.endswith('.html'):
105 s += '.html'
106 return s
107
108# Convenience class to hold info about "a book" in HTMLHelp terms == a doc
109# directory in Python terms.
110class Book:
111 def __init__(self, directory, title, firstpage,
112 contentpage=None, indexpage=None):
113 self.directory = directory
114 self.title = title
115 self.firstpage = addhtml(firstpage)
116 self.contentpage = addhtml(contentpage)
117 self.indexpage = addhtml(indexpage)
118
119# Library Doc list of books:
Tim Peters661e4922002-04-20 02:39:44 +0000120# each 'book' : (Dir, Title, First page, Content page, Index page)
Tim Peters52cfa332002-04-19 16:09:26 +0000121supported_libraries = {
122 '2.2': ### Beta!!! fix for actual release
123 [
Tim Petersd9a10502002-04-20 03:25:02 +0000124 Book('.', 'Global Module Index', 'modindex'),
125 Book('whatsnew', "What's New", 'index', 'contents'),
126 Book('tut','Tutorial','tut','node2'),
127 Book('lib','Library Reference','lib','contents','genindex'),
128 Book('ref','Language Reference','ref','contents','genindex'),
129 Book('mac','Macintosh Reference','mac','contents','genindex'),
130 Book('ext','Extending and Embedding','ext','contents'),
131 Book('api','Python/C API','api','contents','genindex'),
132 Book('doc','Documenting Python','doc','contents'),
133 Book('inst','Installing Python Modules', 'inst', 'index'),
134 Book('dist','Distributing Python Modules', 'dist', 'index'),
Tim Peters52cfa332002-04-19 16:09:26 +0000135 ],
136
137 '2.1.1':
138 [
Tim Petersd9a10502002-04-20 03:25:02 +0000139 Book('.', 'Global Module Index', 'modindex'),
140 Book('tut','Tutorial','tut','node2'),
141 Book('lib','Library Reference','lib','contents','genindex'),
142 Book('ref','Language Reference','ref','contents','genindex'),
143 Book('mac','Macintosh Reference','mac','contents','genindex'),
144 Book('ext','Extending and Embedding','ext','contents'),
145 Book('api','Python/C API','api','contents','genindex'),
146 Book('doc','Documenting Python','doc','contents'),
147 Book('inst','Installing Python Modules', 'inst', 'index'),
148 Book('dist','Distributing Python Modules', 'dist', 'index'),
Tim Peters52cfa332002-04-19 16:09:26 +0000149 ],
150
151 '2.0.0':
152 [
Tim Petersd9a10502002-04-20 03:25:02 +0000153 Book('.', 'Global Module Index', 'modindex'),
154 Book('tut','Tutorial','tut','node2'),
155 Book('lib','Library Reference','lib','contents','genindex'),
156 Book('ref','Language Reference','ref','contents','genindex'),
157 Book('mac','Macintosh Reference','mac','contents','genindex'),
158 Book('ext','Extending and Embedding','ext','contents'),
159 Book('api','Python/C API','api','contents','genindex'),
160 Book('doc','Documenting Python','doc','contents'),
161 Book('inst','Installing Python Modules', 'inst', 'contents'),
162 Book('dist','Distributing Python Modules', 'dist', 'contents'),
Tim Peters52cfa332002-04-19 16:09:26 +0000163 ],
164
165 # <dnagata@creo.com> Apr 17/99: library for 1.5.2 version:
166 # <hernan.foffani@iname.com> May 01/99: library for 1.5.2 (04/30/99):
167 '1.5.2':
168 [
Tim Petersd9a10502002-04-20 03:25:02 +0000169 Book('tut','Tutorial','tut','node2'),
170 Book('lib','Library Reference','lib','contents','genindex'),
171 Book('ref','Language Reference','ref','contents','genindex'),
172 Book('mac','Macintosh Reference','mac','contents','genindex'),
173 Book('ext','Extending and Embedding','ext','contents'),
174 Book('api','Python/C API','api','contents','genindex'),
175 Book('doc','Documenting Python','doc','contents')
Tim Peters52cfa332002-04-19 16:09:26 +0000176 ],
177
178 # library for 1.5.1 version:
179 '1.5.1':
180 [
Tim Petersd9a10502002-04-20 03:25:02 +0000181 Book('tut','Tutorial','tut','contents'),
182 Book('lib','Library Reference','lib','contents','genindex'),
183 Book('ref','Language Reference','ref-1','ref-2','ref-11'),
184 Book('ext','Extending and Embedding','ext','contents'),
185 Book('api','Python/C API','api','contents','genindex')
Tim Peters52cfa332002-04-19 16:09:26 +0000186 ],
187
188 # library for 1.5 version:
189 '1.5':
190 [
Tim Petersd9a10502002-04-20 03:25:02 +0000191 Book('tut','Tutorial','tut','node1'),
192 Book('lib','Library Reference','lib','node1','node268'),
193 Book('ref','Language Reference','ref-1','ref-2','ref-11'),
194 Book('ext','Extending and Embedding','ext','node1'),
195 Book('api','Python/C API','api','node1','node48')
Tim Peters52cfa332002-04-19 16:09:26 +0000196 ]
197}
198
Tim Peterse21095e2002-04-20 08:36:42 +0000199# AlmostNullWriter doesn't print anything; it just arranges to save the
200# text sent to send_flowing_data(). This is used to capture the text
201# between an anchor begin/end pair, e.g. for TOC entries.
202
203class AlmostNullWriter(NullWriter):
204
205 def __init__(self):
206 NullWriter.__init__(self)
207 self.saved_clear()
Tim Peters52cfa332002-04-19 16:09:26 +0000208
Tim Peters661e4922002-04-20 02:39:44 +0000209 def send_flowing_data(self, data):
Tim Peterse21095e2002-04-20 08:36:42 +0000210 stripped = data.strip()
211 if stripped: # don't bother to save runs of whitespace
212 self.saved.append(stripped)
Tim Peters52cfa332002-04-19 16:09:26 +0000213
Tim Peterse21095e2002-04-20 08:36:42 +0000214 # Forget all saved text.
215 def saved_clear(self):
216 self.saved = []
Tim Peters52cfa332002-04-19 16:09:26 +0000217
Tim Peterse21095e2002-04-20 08:36:42 +0000218 # Return all saved text as a string.
219 def saved_get(self):
220 return ' '.join(self.saved)
221
222class HelpHtmlParser(HTMLParser):
223
224 def __init__(self, formatter, path, output):
225 HTMLParser.__init__(self, formatter)
226 self.path = path # relative path
227 self.ft = output # output file
228 self.indent = 0 # number of tabs for pretty printing of files
229 self.proc = False # True when actively processing, else False
230 # (headers, footers, etc)
Tim Peters52cfa332002-04-19 16:09:26 +0000231
Tim Peters661e4922002-04-20 02:39:44 +0000232 def begin_group(self):
Tim Peterse21095e2002-04-20 08:36:42 +0000233 self.indent += 1
234 self.proc = True
Tim Peters52cfa332002-04-19 16:09:26 +0000235
Tim Peterse21095e2002-04-20 08:36:42 +0000236 def finish_group(self):
237 self.indent -= 1
238 # stop processing when back to top level
239 self.proc = self.indent > 0
Tim Peters52cfa332002-04-19 16:09:26 +0000240
Tim Peters661e4922002-04-20 02:39:44 +0000241 def anchor_bgn(self, href, name, type):
242 if self.proc:
Tim Peterse21095e2002-04-20 08:36:42 +0000243 self.saved_clear()
244 self.write('<OBJECT type="text/sitemap">\n')
245 self.tab('\t<param name="Local" value="%s/%s">\n' %
246 (self.path, href))
Tim Peters52cfa332002-04-19 16:09:26 +0000247
Tim Peters661e4922002-04-20 02:39:44 +0000248 def anchor_end(self):
249 if self.proc:
Tim Peterse21095e2002-04-20 08:36:42 +0000250 self.tab('\t<param name="Name" value="%s">\n' % self.saved_get())
251 self.tab('\t</OBJECT>\n')
Tim Peters52cfa332002-04-19 16:09:26 +0000252
Tim Peters661e4922002-04-20 02:39:44 +0000253 def start_dl(self, atr_val):
Tim Peters52cfa332002-04-19 16:09:26 +0000254 self.begin_group()
255
Tim Peters661e4922002-04-20 02:39:44 +0000256 def end_dl(self):
Tim Peterse21095e2002-04-20 08:36:42 +0000257 self.finish_group()
Tim Peters52cfa332002-04-19 16:09:26 +0000258
Tim Peters661e4922002-04-20 02:39:44 +0000259 def do_dt(self, atr_val):
Tim Peterse21095e2002-04-20 08:36:42 +0000260 # no trailing newline on purpose!
261 self.tab("<LI>")
Tim Peters52cfa332002-04-19 16:09:26 +0000262
Tim Peterse21095e2002-04-20 08:36:42 +0000263 # Write text to output file.
264 def write(self, text):
265 self.ft.write(text)
266
267 # Write text to output file after indenting by self.indent tabs.
268 def tab(self, text=''):
269 self.write('\t' * self.indent)
270 if text:
271 self.write(text)
272
273 # Forget all saved text.
274 def saved_clear(self):
275 self.formatter.writer.saved_clear()
276
277 # Return all saved text as a string.
278 def saved_get(self):
279 return self.formatter.writer.saved_get()
Tim Peters52cfa332002-04-19 16:09:26 +0000280
Tim Peters661e4922002-04-20 02:39:44 +0000281class IdxHlpHtmlParser(HelpHtmlParser):
Tim Peters52cfa332002-04-19 16:09:26 +0000282 # nothing special here, seems enough with parent class
283 pass
284
Tim Peters661e4922002-04-20 02:39:44 +0000285class TocHlpHtmlParser(HelpHtmlParser):
Tim Peters52cfa332002-04-19 16:09:26 +0000286
Tim Peters661e4922002-04-20 02:39:44 +0000287 def start_dl(self, atr_val):
Tim Peters52cfa332002-04-19 16:09:26 +0000288 self.begin_group()
Tim Peterse21095e2002-04-20 08:36:42 +0000289 self.tab('<UL>\n')
Tim Peters52cfa332002-04-19 16:09:26 +0000290
Tim Peters661e4922002-04-20 02:39:44 +0000291 def end_dl(self):
Tim Peterse21095e2002-04-20 08:36:42 +0000292 self.finish_group()
293 self.tab('</UL>\n')
Tim Peters52cfa332002-04-19 16:09:26 +0000294
Tim Peters661e4922002-04-20 02:39:44 +0000295 def start_ul(self, atr_val):
Tim Peters52cfa332002-04-19 16:09:26 +0000296 self.begin_group()
Tim Peterse21095e2002-04-20 08:36:42 +0000297 self.tab('<UL>\n')
Tim Peters52cfa332002-04-19 16:09:26 +0000298
Tim Peters661e4922002-04-20 02:39:44 +0000299 def end_ul(self):
Tim Peterse21095e2002-04-20 08:36:42 +0000300 self.finish_group()
301 self.tab('</UL>\n')
Tim Peters52cfa332002-04-19 16:09:26 +0000302
Tim Peters661e4922002-04-20 02:39:44 +0000303 def do_li(self, atr_val):
Tim Peterse21095e2002-04-20 08:36:42 +0000304 # no trailing newline on purpose!
305 self.tab("<LI>")
Tim Peters52cfa332002-04-19 16:09:26 +0000306
Tim Petersd9a10502002-04-20 03:25:02 +0000307def index(path, indexpage, output):
Tim Peterse21095e2002-04-20 08:36:42 +0000308 parser = IdxHlpHtmlParser(AbstractFormatter(AlmostNullWriter()),
309 path, output)
Tim Petersd9a10502002-04-20 03:25:02 +0000310 f = open(path + '/' + indexpage)
311 parser.feed(f.read())
Tim Peters52cfa332002-04-19 16:09:26 +0000312 parser.close()
Tim Petersd9a10502002-04-20 03:25:02 +0000313 f.close()
Tim Peters52cfa332002-04-19 16:09:26 +0000314
Tim Petersd9a10502002-04-20 03:25:02 +0000315def content(path, contentpage, output):
Tim Peterse21095e2002-04-20 08:36:42 +0000316 parser = TocHlpHtmlParser(AbstractFormatter(AlmostNullWriter()),
317 path, output)
Tim Petersd9a10502002-04-20 03:25:02 +0000318 f = open(path + '/' + contentpage)
319 parser.feed(f.read())
Tim Peters52cfa332002-04-19 16:09:26 +0000320 parser.close()
Tim Petersd9a10502002-04-20 03:25:02 +0000321 f.close()
Tim Peters52cfa332002-04-19 16:09:26 +0000322
Tim Peters661e4922002-04-20 02:39:44 +0000323def do_index(library, output):
Tim Peters52cfa332002-04-19 16:09:26 +0000324 output.write('<UL>\n')
Tim Peters661e4922002-04-20 02:39:44 +0000325 for book in library:
Tim Petersd9a10502002-04-20 03:25:02 +0000326 print '\t', book.title, '-', book.indexpage
327 if book.indexpage:
328 index(book.directory, book.indexpage, output)
Tim Peters52cfa332002-04-19 16:09:26 +0000329 output.write('</UL>\n')
330
Tim Peters661e4922002-04-20 02:39:44 +0000331def do_content(library, version, output):
Tim Peters52cfa332002-04-19 16:09:26 +0000332 output.write(contents_header % version)
Tim Peters661e4922002-04-20 02:39:44 +0000333 for book in library:
Tim Petersd9a10502002-04-20 03:25:02 +0000334 print '\t', book.title, '-', book.firstpage
335 output.write(object_sitemap % (book.directory + "/" + book.firstpage,
336 book.title))
337 if book.contentpage:
338 content(book.directory, book.contentpage, output)
Tim Peters52cfa332002-04-19 16:09:26 +0000339 output.write(contents_footer)
340
Tim Peters460643b2002-04-20 02:37:07 +0000341# Fill in the [FILES] section of the project (.hhp) file.
342# 'library' is the list of directory description tuples from
343# supported_libraries for the version of the docs getting generated.
344def do_project(library, output, arch, version):
Tim Peterse6b63e62002-04-19 18:07:52 +0000345 output.write(project_template % locals())
Tim Peters460643b2002-04-20 02:37:07 +0000346 for book in library:
Tim Petersd9a10502002-04-20 03:25:02 +0000347 directory = book.directory
Tim Peters460643b2002-04-20 02:37:07 +0000348 path = directory + '\\%s\n'
349 for page in os.listdir(directory):
350 if page.endswith('.html') or page.endswith('.css'):
351 output.write(path % page)
Tim Peters52cfa332002-04-19 16:09:26 +0000352
Tim Peters661e4922002-04-20 02:39:44 +0000353def openfile(file):
354 try:
Tim Peters52cfa332002-04-19 16:09:26 +0000355 p = open(file, "w")
Tim Peters661e4922002-04-20 02:39:44 +0000356 except IOError, msg:
Tim Peters52cfa332002-04-19 16:09:26 +0000357 print file, ":", msg
358 sys.exit(1)
359 return p
360
Tim Peters661e4922002-04-20 02:39:44 +0000361def usage():
Tim Peters52cfa332002-04-19 16:09:26 +0000362 print usage_mode
363 sys.exit(0)
364
Tim Peters661e4922002-04-20 02:39:44 +0000365def do_it(args = None):
366 if not args:
Tim Peters52cfa332002-04-19 16:09:26 +0000367 args = sys.argv[1:]
368
Tim Peters661e4922002-04-20 02:39:44 +0000369 if not args:
Tim Peters52cfa332002-04-19 16:09:26 +0000370 usage()
371
Tim Peters661e4922002-04-20 02:39:44 +0000372 try:
Tim Peters52cfa332002-04-19 16:09:26 +0000373 optlist, args = getopt.getopt(args, 'ckpv:')
Tim Peters661e4922002-04-20 02:39:44 +0000374 except getopt.error, msg:
Tim Peters52cfa332002-04-19 16:09:26 +0000375 print msg
376 usage()
377
Tim Peters661e4922002-04-20 02:39:44 +0000378 if not args or len(args) > 1:
Tim Peters52cfa332002-04-19 16:09:26 +0000379 usage()
380 arch = args[0]
381
382 version = None
383 for opt in optlist:
384 if opt[0] == '-v':
385 version = opt[1]
386 break
387 if not version:
388 usage()
389
Tim Petersd9a10502002-04-20 03:25:02 +0000390 library = supported_libraries[version]
Tim Peters52cfa332002-04-19 16:09:26 +0000391
Tim Peters661e4922002-04-20 02:39:44 +0000392 if not (('-p','') in optlist):
Tim Peters4f109c12002-04-19 18:41:46 +0000393 fname = arch + '.stp'
394 f = openfile(fname)
395 print "Building stoplist", fname, "..."
396 words = stop_list.split()
397 words.sort()
398 for word in words:
399 print >> f, word
400 f.close()
401
Tim Peters52cfa332002-04-19 16:09:26 +0000402 f = openfile(arch + '.hhp')
403 print "Building Project..."
404 do_project(library, f, arch, version)
405 if version == '2.0.0':
406 for image in os.listdir('icons'):
407 f.write('icons'+ '\\' + image + '\n')
408
409 f.close()
410
Tim Peters661e4922002-04-20 02:39:44 +0000411 if not (('-c','') in optlist):
Tim Peters52cfa332002-04-19 16:09:26 +0000412 f = openfile(arch + '.hhc')
413 print "Building Table of Content..."
414 do_content(library, version, f)
415 f.close()
416
Tim Peters661e4922002-04-20 02:39:44 +0000417 if not (('-k','') in optlist):
Tim Peters52cfa332002-04-19 16:09:26 +0000418 f = openfile(arch + '.hhk')
419 print "Building Index..."
420 do_index(library, f)
421 f.close()
422
Tim Peters661e4922002-04-20 02:39:44 +0000423if __name__ == '__main__':
Tim Peters52cfa332002-04-19 16:09:26 +0000424 do_it()