| """ | 
 |     Makes the necesary files to convert from plain html of | 
 |     Python 1.5 and 1.5.x Documentation to | 
 |     Microsoft HTML Help format version 1.1 | 
 |     Doesn't change the html's docs. | 
 |  | 
 |     by hernan.foffani@iname.com | 
 |     no copyright and no responsabilities. | 
 |  | 
 |     modified by Dale Nagata for Python 1.5.2 | 
 |  | 
 |     Renamed from make_chm.py to prechm.py, and checked into the Python | 
 |     project, 19-Apr-2002 by Tim Peters.  Assorted modifications by Tim | 
 |     and Fred Drake.  Obtained from Robin Dunn's .chm packaging of the | 
 |     Python 2.2 docs, at <http://alldunn.com/python/>. | 
 | """ | 
 |  | 
 | import sys | 
 | import os | 
 | from formatter import NullWriter, AbstractFormatter | 
 | from htmllib import HTMLParser | 
 | import getopt | 
 | import cgi | 
 |  | 
 | usage_mode = ''' | 
 | Usage: prechm.py [-c] [-k] [-p] [-v 1.5[.x]] filename | 
 |     -c: does not build filename.hhc (Table of Contents) | 
 |     -k: does not build filename.hhk (Index) | 
 |     -p: does not build filename.hhp (Project File) | 
 |     -v 1.5[.x]: makes help for the python 1.5[.x] docs | 
 |         (default is python 1.5.2 docs) | 
 | ''' | 
 |  | 
 | # Project file (*.hhp) template.  'arch' is the file basename (like | 
 | # the pythlp in pythlp.hhp); 'version' is the doc version number (like | 
 | # the 2.2 in Python 2.2). | 
 | # The magical numbers in the long line under [WINDOWS] set most of the | 
 | # user-visible features (visible buttons, tabs, etc). | 
 | # About 0x10384e:  This defines the buttons in the help viewer.  The | 
 | # following defns are taken from htmlhelp.h.  Not all possibilities | 
 | # actually work, and not all those that work are available from the Help | 
 | # Workshop GUI.  In particular, the Zoom/Font button works and is not | 
 | # available from the GUI.  The ones we're using are marked with 'x': | 
 | # | 
 | #    0x000002   Hide/Show   x | 
 | #    0x000004   Back        x | 
 | #    0x000008   Forward     x | 
 | #    0x000010   Stop | 
 | #    0x000020   Refresh | 
 | #    0x000040   Home        x | 
 | #    0x000080   Forward | 
 | #    0x000100   Back | 
 | #    0x000200   Notes | 
 | #    0x000400   Contents | 
 | #    0x000800   Locate      x | 
 | #    0x001000   Options     x | 
 | #    0x002000   Print       x | 
 | #    0x004000   Index | 
 | #    0x008000   Search | 
 | #    0x010000   History | 
 | #    0x020000   Favorites | 
 | #    0x040000   Jump 1 | 
 | #    0x080000   Jump 2 | 
 | #    0x100000   Zoom/Font   x | 
 | #    0x200000   TOC Next | 
 | #    0x400000   TOC Prev | 
 |  | 
 | project_template = ''' | 
 | [OPTIONS] | 
 | Compiled file=%(arch)s.chm | 
 | Contents file=%(arch)s.hhc | 
 | Default Window=%(arch)s | 
 | Default topic=index.html | 
 | Display compile progress=No | 
 | Full text search stop list file=%(arch)s.stp | 
 | Full-text search=Yes | 
 | Index file=%(arch)s.hhk | 
 | Language=0x409 | 
 | Title=Python %(version)s Documentation | 
 |  | 
 | [WINDOWS] | 
 | %(arch)s="Python %(version)s Documentation","%(arch)s.hhc","%(arch)s.hhk",\ | 
 | "index.html","index.html",,,,,0x63520,220,0x10384e,[0,0,1024,768],,,,,,,0 | 
 |  | 
 | [FILES] | 
 | ''' | 
 |  | 
 | contents_header = '''\ | 
 | <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN"> | 
 | <HTML> | 
 | <HEAD> | 
 | <meta name="GENERATOR" content="Microsoft® HTML Help Workshop 4.1"> | 
 | <!-- Sitemap 1.0 --> | 
 | </HEAD><BODY> | 
 | <OBJECT type="text/site properties"> | 
 |         <param name="Window Styles" value="0x801227"> | 
 |         <param name="ImageType" value="Folder"> | 
 | </OBJECT> | 
 | <UL> | 
 | ''' | 
 |  | 
 | contents_footer = '''\ | 
 | </UL></BODY></HTML> | 
 | ''' | 
 |  | 
 | object_sitemap = '''\ | 
 | <OBJECT type="text/sitemap"> | 
 |     <param name="Name" value="%s"> | 
 |     <param name="Local" value="%s"> | 
 | </OBJECT> | 
 | ''' | 
 |  | 
 | # List of words the full text search facility shouldn't index.  This | 
 | # becomes file ARCH.stp.  Note that this list must be pretty small! | 
 | # Different versions of the MS docs claim the file has a maximum size of | 
 | # 256 or 512 bytes (including \r\n at the end of each line). | 
 | # Note that "and", "or", "not" and "near" are operators in the search | 
 | # language, so no point indexing them even if we wanted to. | 
 | stop_list = ''' | 
 | a  and  are  as  at | 
 | be  but  by | 
 | for | 
 | if  in  into  is  it | 
 | near  no  not | 
 | of  on  or | 
 | such | 
 | that  the  their  then  there  these  they  this  to | 
 | was  will  with | 
 | ''' | 
 |  | 
 | # s is a string or None.  If None or empty, return None.  Else tack '.html' | 
 | # on to the end, unless it's already there. | 
 | def addhtml(s): | 
 |     if s: | 
 |         if not s.endswith('.html'): | 
 |             s += '.html' | 
 |     return s | 
 |  | 
 | # Convenience class to hold info about "a book" in HTMLHelp terms == a doc | 
 | # directory in Python terms. | 
 | class Book: | 
 |     def __init__(self, directory, title, firstpage, | 
 |                  contentpage=None, indexpage=None): | 
 |         self.directory   = directory | 
 |         self.title       = title | 
 |         self.firstpage   = addhtml(firstpage) | 
 |         self.contentpage = addhtml(contentpage) | 
 |         self.indexpage   = addhtml(indexpage) | 
 |  | 
 | # Library Doc list of books: | 
 | # each 'book' : (Dir, Title, First page, Content page, Index page) | 
 | supported_libraries = { | 
 |     '2.5': | 
 |     [ | 
 |         Book('.', 'Main page', 'index'), | 
 |         Book('.', 'Global Module Index', 'modindex'), | 
 |         Book('whatsnew', "What's New", 'index', 'contents'), | 
 |         Book('tut','Tutorial','tut','node2'), | 
 |         Book('lib','Library Reference','lib','contents','genindex'), | 
 |         Book('ref','Language Reference','ref','contents','genindex'), | 
 |         Book('mac','Macintosh Reference','mac','contents','genindex'), | 
 |         Book('ext','Extending and Embedding','ext','contents'), | 
 |         Book('api','Python/C API','api','contents','genindex'), | 
 |         Book('doc','Documenting Python','doc','contents'), | 
 |         Book('inst','Installing Python Modules', 'inst', 'index'), | 
 |         Book('dist','Distributing Python Modules', 'dist', 'index', 'genindex'), | 
 |     ], | 
 |  | 
 |     '2.4': | 
 |     [ | 
 |         Book('.', 'Main page', 'index'), | 
 |         Book('.', 'Global Module Index', 'modindex'), | 
 |         Book('whatsnew', "What's New", 'index', 'contents'), | 
 |         Book('tut','Tutorial','tut','node2'), | 
 |         Book('lib','Library Reference','lib','contents','genindex'), | 
 |         Book('ref','Language Reference','ref','contents','genindex'), | 
 |         Book('mac','Macintosh Reference','mac','contents','genindex'), | 
 |         Book('ext','Extending and Embedding','ext','contents'), | 
 |         Book('api','Python/C API','api','contents','genindex'), | 
 |         Book('doc','Documenting Python','doc','contents'), | 
 |         Book('inst','Installing Python Modules', 'inst', 'index'), | 
 |         Book('dist','Distributing Python Modules', 'dist', 'index', 'genindex'), | 
 |     ], | 
 |  | 
 |     '2.3': | 
 |     [ | 
 |         Book('.', 'Main page', 'index'), | 
 |         Book('.', 'Global Module Index', 'modindex'), | 
 |         Book('whatsnew', "What's New", 'index', 'contents'), | 
 |         Book('tut','Tutorial','tut','node2'), | 
 |         Book('lib','Library Reference','lib','contents','genindex'), | 
 |         Book('ref','Language Reference','ref','contents','genindex'), | 
 |         Book('mac','Macintosh Reference','mac','contents','genindex'), | 
 |         Book('ext','Extending and Embedding','ext','contents'), | 
 |         Book('api','Python/C API','api','contents','genindex'), | 
 |         Book('doc','Documenting Python','doc','contents'), | 
 |         Book('inst','Installing Python Modules', 'inst', 'index'), | 
 |         Book('dist','Distributing Python Modules', 'dist', 'index'), | 
 |     ], | 
 |  | 
 |     '2.2': | 
 |     [ | 
 |         Book('.', 'Main page', 'index'), | 
 |         Book('.', 'Global Module Index', 'modindex'), | 
 |         Book('whatsnew', "What's New", 'index', 'contents'), | 
 |         Book('tut','Tutorial','tut','node2'), | 
 |         Book('lib','Library Reference','lib','contents','genindex'), | 
 |         Book('ref','Language Reference','ref','contents','genindex'), | 
 |         Book('mac','Macintosh Reference','mac','contents','genindex'), | 
 |         Book('ext','Extending and Embedding','ext','contents'), | 
 |         Book('api','Python/C API','api','contents','genindex'), | 
 |         Book('doc','Documenting Python','doc','contents'), | 
 |         Book('inst','Installing Python Modules', 'inst', 'index'), | 
 |         Book('dist','Distributing Python Modules', 'dist', 'index'), | 
 |     ], | 
 |  | 
 |     '2.1.1': | 
 |     [ | 
 |         Book('.', 'Main page', 'index'), | 
 |         Book('.', 'Global Module Index', 'modindex'), | 
 |         Book('tut','Tutorial','tut','node2'), | 
 |         Book('lib','Library Reference','lib','contents','genindex'), | 
 |         Book('ref','Language Reference','ref','contents','genindex'), | 
 |         Book('mac','Macintosh Reference','mac','contents','genindex'), | 
 |         Book('ext','Extending and Embedding','ext','contents'), | 
 |         Book('api','Python/C API','api','contents','genindex'), | 
 |         Book('doc','Documenting Python','doc','contents'), | 
 |         Book('inst','Installing Python Modules', 'inst', 'index'), | 
 |         Book('dist','Distributing Python Modules', 'dist', 'index'), | 
 |     ], | 
 |  | 
 |     '2.0.0': | 
 |     [ | 
 |         Book('.', 'Global Module Index', 'modindex'), | 
 |         Book('tut','Tutorial','tut','node2'), | 
 |         Book('lib','Library Reference','lib','contents','genindex'), | 
 |         Book('ref','Language Reference','ref','contents','genindex'), | 
 |         Book('mac','Macintosh Reference','mac','contents','genindex'), | 
 |         Book('ext','Extending and Embedding','ext','contents'), | 
 |         Book('api','Python/C API','api','contents','genindex'), | 
 |         Book('doc','Documenting Python','doc','contents'), | 
 |         Book('inst','Installing Python Modules', 'inst', 'contents'), | 
 |         Book('dist','Distributing Python Modules', 'dist', 'contents'), | 
 |     ], | 
 |  | 
 |     # <dnagata@creo.com> Apr 17/99: library for 1.5.2 version: | 
 |     # <hernan.foffani@iname.com> May 01/99: library for 1.5.2 (04/30/99): | 
 |     '1.5.2': | 
 |     [ | 
 |         Book('tut','Tutorial','tut','node2'), | 
 |         Book('lib','Library Reference','lib','contents','genindex'), | 
 |         Book('ref','Language Reference','ref','contents','genindex'), | 
 |         Book('mac','Macintosh Reference','mac','contents','genindex'), | 
 |         Book('ext','Extending and Embedding','ext','contents'), | 
 |         Book('api','Python/C API','api','contents','genindex'), | 
 |         Book('doc','Documenting Python','doc','contents') | 
 |     ], | 
 |  | 
 |     # library for 1.5.1 version: | 
 |     '1.5.1': | 
 |     [ | 
 |         Book('tut','Tutorial','tut','contents'), | 
 |         Book('lib','Library Reference','lib','contents','genindex'), | 
 |         Book('ref','Language Reference','ref-1','ref-2','ref-11'), | 
 |         Book('ext','Extending and Embedding','ext','contents'), | 
 |         Book('api','Python/C API','api','contents','genindex') | 
 |     ], | 
 |  | 
 |     # library for 1.5 version: | 
 |     '1.5': | 
 |     [ | 
 |         Book('tut','Tutorial','tut','node1'), | 
 |         Book('lib','Library Reference','lib','node1','node268'), | 
 |         Book('ref','Language Reference','ref-1','ref-2','ref-11'), | 
 |         Book('ext','Extending and Embedding','ext','node1'), | 
 |         Book('api','Python/C API','api','node1','node48') | 
 |     ] | 
 | } | 
 |  | 
 | # AlmostNullWriter doesn't print anything; it just arranges to save the | 
 | # text sent to send_flowing_data().  This is used to capture the text | 
 | # between an anchor begin/end pair, e.g. for TOC entries. | 
 |  | 
 | class AlmostNullWriter(NullWriter): | 
 |  | 
 |     def __init__(self): | 
 |         NullWriter.__init__(self) | 
 |         self.saved_clear() | 
 |  | 
 |     def send_flowing_data(self, data): | 
 |         stripped = data.strip() | 
 |         if stripped:    # don't bother to save runs of whitespace | 
 |             self.saved.append(stripped) | 
 |  | 
 |     # Forget all saved text. | 
 |     def saved_clear(self): | 
 |         self.saved = [] | 
 |  | 
 |     # Return all saved text as a string. | 
 |     def saved_get(self): | 
 |         return ' '.join(self.saved) | 
 |  | 
 | class HelpHtmlParser(HTMLParser): | 
 |  | 
 |     def __init__(self, formatter, path, output): | 
 |         HTMLParser.__init__(self, formatter) | 
 |         self.path = path    # relative path | 
 |         self.ft = output    # output file | 
 |         self.indent = 0     # number of tabs for pretty printing of files | 
 |         self.proc = False   # True when actively processing, else False | 
 |                             # (headers, footers, etc) | 
 |         # XXX This shouldn't need to be a stack -- anchors shouldn't nest. | 
 |         # XXX See SF bug <http://www.python.org/sf/546579>. | 
 |         self.hrefstack = [] # stack of hrefs from anchor begins | 
 |  | 
 |     def begin_group(self): | 
 |         self.indent += 1 | 
 |         self.proc = True | 
 |  | 
 |     def finish_group(self): | 
 |         self.indent -= 1 | 
 |         # stop processing when back to top level | 
 |         self.proc = self.indent > 0 | 
 |  | 
 |     def anchor_bgn(self, href, name, type): | 
 |         if self.proc: | 
 |             # XXX See SF bug <http://www.python.org/sf/546579>. | 
 |             # XXX index.html for the 2.2.1 language reference manual contains | 
 |             # XXX nested <a></a> tags in the entry for the section on blank | 
 |             # XXX lines.  We want to ignore the nested part completely. | 
 |             if len(self.hrefstack) == 0: | 
 |                 self.saved_clear() | 
 |                 self.hrefstack.append(href) | 
 |  | 
 |     def anchor_end(self): | 
 |         if self.proc: | 
 |             # XXX See XXX above. | 
 |             if self.hrefstack: | 
 |                 title = cgi.escape(self.saved_get(), True) | 
 |                 path = self.path + '/' + self.hrefstack.pop() | 
 |                 self.tab(object_sitemap % (title, path)) | 
 |  | 
 |     def start_dl(self, atr_val): | 
 |         self.begin_group() | 
 |  | 
 |     def end_dl(self): | 
 |         self.finish_group() | 
 |  | 
 |     def do_dt(self, atr_val): | 
 |         # no trailing newline on purpose! | 
 |         self.tab("<LI>") | 
 |  | 
 |     # Write text to output file. | 
 |     def write(self, text): | 
 |         self.ft.write(text) | 
 |  | 
 |     # Write text to output file after indenting by self.indent tabs. | 
 |     def tab(self, text=''): | 
 |         self.write('\t' * self.indent) | 
 |         if text: | 
 |             self.write(text) | 
 |  | 
 |     # Forget all saved text. | 
 |     def saved_clear(self): | 
 |         self.formatter.writer.saved_clear() | 
 |  | 
 |     # Return all saved text as a string. | 
 |     def saved_get(self): | 
 |         return self.formatter.writer.saved_get() | 
 |  | 
 | class IdxHlpHtmlParser(HelpHtmlParser): | 
 |     # nothing special here, seems enough with parent class | 
 |     pass | 
 |  | 
 | class TocHlpHtmlParser(HelpHtmlParser): | 
 |  | 
 |     def start_dl(self, atr_val): | 
 |         self.begin_group() | 
 |         self.tab('<UL>\n') | 
 |  | 
 |     def end_dl(self): | 
 |         self.finish_group() | 
 |         self.tab('</UL>\n') | 
 |  | 
 |     def start_ul(self, atr_val): | 
 |         self.begin_group() | 
 |         self.tab('<UL>\n') | 
 |  | 
 |     def end_ul(self): | 
 |         self.finish_group() | 
 |         self.tab('</UL>\n') | 
 |  | 
 |     def do_li(self, atr_val): | 
 |         # no trailing newline on purpose! | 
 |         self.tab("<LI>") | 
 |  | 
 | def index(path, indexpage, output): | 
 |     parser = IdxHlpHtmlParser(AbstractFormatter(AlmostNullWriter()), | 
 |                               path, output) | 
 |     f = open(path + '/' + indexpage) | 
 |     parser.feed(f.read()) | 
 |     parser.close() | 
 |     f.close() | 
 |  | 
 | def content(path, contentpage, output): | 
 |     parser = TocHlpHtmlParser(AbstractFormatter(AlmostNullWriter()), | 
 |                               path, output) | 
 |     f = open(path + '/' + contentpage) | 
 |     parser.feed(f.read()) | 
 |     parser.close() | 
 |     f.close() | 
 |  | 
 | def do_index(library, output): | 
 |     output.write('<UL>\n') | 
 |     for book in library: | 
 |         print '\t', book.title, '-', book.indexpage | 
 |         if book.indexpage: | 
 |             index(book.directory, book.indexpage, output) | 
 |     output.write('</UL>\n') | 
 |  | 
 | def do_content(library, version, output): | 
 |     output.write(contents_header) | 
 |     for book in library: | 
 |         print '\t', book.title, '-', book.firstpage | 
 |         path = book.directory + "/" + book.firstpage | 
 |         output.write('<LI>') | 
 |         output.write(object_sitemap % (book.title, path)) | 
 |         if book.contentpage: | 
 |             content(book.directory, book.contentpage, output) | 
 |     output.write(contents_footer) | 
 |  | 
 | # Fill in the [FILES] section of the project (.hhp) file. | 
 | # 'library' is the list of directory description tuples from | 
 | # supported_libraries for the version of the docs getting generated. | 
 | def do_project(library, output, arch, version): | 
 |     output.write(project_template % locals()) | 
 |     pathseen = {} | 
 |     for book in library: | 
 |         directory = book.directory | 
 |         path = directory + '\\%s\n' | 
 |         for page in os.listdir(directory): | 
 |             if page.endswith('.html') or page.endswith('.css'): | 
 |                 fullpath = path % page | 
 |                 if fullpath not in pathseen: | 
 |                     output.write(fullpath) | 
 |                     pathseen[fullpath] = True | 
 |  | 
 | def openfile(file): | 
 |     try: | 
 |         p = open(file, "w") | 
 |     except IOError, msg: | 
 |         print file, ":", msg | 
 |         sys.exit(1) | 
 |     return p | 
 |  | 
 | def usage(): | 
 |     print usage_mode | 
 |     sys.exit(0) | 
 |  | 
 | def do_it(args = None): | 
 |     if not args: | 
 |         args = sys.argv[1:] | 
 |  | 
 |     if not args: | 
 |         usage() | 
 |  | 
 |     try: | 
 |         optlist, args = getopt.getopt(args, 'ckpv:') | 
 |     except getopt.error, msg: | 
 |         print msg | 
 |         usage() | 
 |  | 
 |     if not args or len(args) > 1: | 
 |         usage() | 
 |     arch = args[0] | 
 |  | 
 |     version = None | 
 |     for opt in optlist: | 
 |         if opt[0] == '-v': | 
 |             version = opt[1] | 
 |             break | 
 |     if not version: | 
 |         usage() | 
 |  | 
 |     library = supported_libraries[version] | 
 |  | 
 |     if not (('-p','') in optlist): | 
 |         fname = arch + '.stp' | 
 |         f = openfile(fname) | 
 |         print "Building stoplist", fname, "..." | 
 |         words = stop_list.split() | 
 |         words.sort() | 
 |         for word in words: | 
 |             print >> f, word | 
 |         f.close() | 
 |  | 
 |         f = openfile(arch + '.hhp') | 
 |         print "Building Project..." | 
 |         do_project(library, f, arch, version) | 
 |         if version == '2.0.0': | 
 |             for image in os.listdir('icons'): | 
 |                 f.write('icons'+ '\\' + image + '\n') | 
 |  | 
 |         f.close() | 
 |  | 
 |     if not (('-c','') in optlist): | 
 |         f = openfile(arch + '.hhc') | 
 |         print "Building Table of Content..." | 
 |         do_content(library, version, f) | 
 |         f.close() | 
 |  | 
 |     if not (('-k','') in optlist): | 
 |         f = openfile(arch + '.hhk') | 
 |         print "Building Index..." | 
 |         do_index(library, f) | 
 |         f.close() | 
 |  | 
 | if __name__ == '__main__': | 
 |     do_it() |