| #! /usr/bin/env python | 
 |  | 
 | __version__ = '$Revision$' | 
 |  | 
 | import os | 
 | import re | 
 | import string | 
 | import sys | 
 |  | 
 |  | 
 | class Node: | 
 |     __rmjunk = re.compile("<#\d+#>") | 
 |  | 
 |     continuation = 0 | 
 |  | 
 |     def __init__(self, link, str, seqno): | 
 |         self.links = [link] | 
 |         self.seqno = seqno | 
 |         # remove <#\d+#> left in by moving the data out of LaTeX2HTML | 
 |         str = self.__rmjunk.sub('', str) | 
 |         # build up the text | 
 |         self.text = split_entry_text(str) | 
 |         self.key = split_entry_key(str) | 
 |  | 
 |     def __cmp__(self, other): | 
 |         """Comparison operator includes sequence number, for use with | 
 |         list.sort().""" | 
 |         return self.cmp_entry(other) or cmp(self.seqno, other.seqno) | 
 |  | 
 |     def cmp_entry(self, other): | 
 |         """Comparison 'operator' that ignores sequence number.""" | 
 |         c = 0 | 
 |         for i in range(min(len(self.key), len(other.key))): | 
 |             c = (cmp_part(self.key[i], other.key[i]) | 
 |                  or cmp_part(self.text[i], other.text[i])) | 
 |             if c: | 
 |                 break | 
 |         return c or cmp(self.key, other.key) or cmp(self.text, other.text) | 
 |  | 
 |     def __repr__(self): | 
 |         return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno) | 
 |  | 
 |     def __str__(self): | 
 |         return string.join(self.key, '!') | 
 |  | 
 |     def dump(self): | 
 |         return "%s\1%s###%s\n" \ | 
 |                % (string.join(self.links, "\1"), | 
 |                   string.join(self.text, '!'), | 
 |                   self.seqno) | 
 |  | 
 |  | 
 | def cmp_part(s1, s2): | 
 |     result = cmp(s1, s2) | 
 |     if result == 0: | 
 |         return 0 | 
 |     l1 = string.lower(s1) | 
 |     l2 = string.lower(s2) | 
 |     minlen = min(len(s1), len(s2)) | 
 |     if len(s1) < len(s2) and l1 == l2[:len(s1)]: | 
 |         result = -1 | 
 |     elif len(s2) < len(s1) and l2 == l1[:len(s2)]: | 
 |         result = 1 | 
 |     else: | 
 |         result = cmp(l1, l2) or cmp(s1, s2) | 
 |     return result | 
 |  | 
 |  | 
 | def split_entry(str, which): | 
 |     stuff = [] | 
 |     parts = string.split(str, '!') | 
 |     parts = map(string.split, parts, ['@'] * len(parts)) | 
 |     for entry in parts: | 
 |         if len(entry) != 1: | 
 |             key = entry[which] | 
 |         else: | 
 |             key = entry[0] | 
 |         stuff.append(key) | 
 |     return stuff | 
 |  | 
 |  | 
 | _rmtt = re.compile(r"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""", | 
 |                    re.IGNORECASE) | 
 | _rmparens = re.compile(r"\(\)") | 
 |  | 
 | def split_entry_key(str): | 
 |     parts = split_entry(str, 1) | 
 |     for i in range(len(parts)): | 
 |         m = _rmtt.match(parts[i]) | 
 |         if m: | 
 |             parts[i] = string.join(m.group(1, 2, 3), '') | 
 |         else: | 
 |             parts[i] = string.lower(parts[i]) | 
 |         # remove '()' from the key: | 
 |         parts[i] = _rmparens.sub('', parts[i]) | 
 |     return map(trim_ignored_letters, parts) | 
 |  | 
 |  | 
 | def split_entry_text(str): | 
 |     if '<' in str: | 
 |         m = _rmtt.match(str) | 
 |         if m: | 
 |             str = string.join(m.group(1, 2, 3), '') | 
 |     return split_entry(str, 1) | 
 |  | 
 |  | 
 | def load(fp): | 
 |     nodes = [] | 
 |     rx = re.compile("(.*)\1(.*)###(.*)$") | 
 |     while 1: | 
 |         line = fp.readline() | 
 |         if not line: | 
 |             break | 
 |         m = rx.match(line) | 
 |         if m: | 
 |             link, str, seqno = m.group(1, 2, 3) | 
 |             nodes.append(Node(link, str, seqno)) | 
 |     return nodes | 
 |  | 
 |  | 
 | def trim_ignored_letters(s): | 
 |     # ignore $ to keep environment variables with the | 
 |     # leading letter from the name | 
 |     s = string.lower(s) | 
 |     if s[0] == "$": | 
 |         return s[1:] | 
 |     else: | 
 |         return s | 
 |  | 
 | def get_first_letter(s): | 
 |     return string.lower(trim_ignored_letters(s)[0]) | 
 |  | 
 |  | 
 | def split_letters(nodes): | 
 |     letter_groups = [] | 
 |     if nodes: | 
 |         group = [] | 
 |         append = group.append | 
 |         letter = get_first_letter(nodes[0].text[0]) | 
 |         letter_groups.append((letter, group)) | 
 |         for node in nodes: | 
 |             nletter = get_first_letter(node.text[0]) | 
 |             if letter != nletter: | 
 |                 letter = nletter | 
 |                 group = [] | 
 |                 letter_groups.append((letter, group)) | 
 |                 append = group.append | 
 |             append(node) | 
 |     return letter_groups | 
 |  | 
 |  | 
 | # need a function to separate the nodes into columns... | 
 | def split_columns(nodes, columns=1): | 
 |     if columns <= 1: | 
 |         return [nodes] | 
 |     # This is a rough height; we may have to increase to avoid breaks before | 
 |     # a subitem. | 
 |     colheight = len(nodes) / columns | 
 |     numlong = len(nodes) % columns | 
 |     if numlong: | 
 |         colheight = colheight + 1 | 
 |     else: | 
 |         numlong = columns | 
 |     cols = [] | 
 |     for i in range(numlong): | 
 |         start = i * colheight | 
 |         end = start + colheight | 
 |         cols.append(nodes[start:end]) | 
 |     del nodes[:end] | 
 |     colheight = colheight - 1 | 
 |     try: | 
 |         numshort = len(nodes) / colheight | 
 |     except ZeroDivisionError: | 
 |         cols = cols + (columns - len(cols)) * [[]] | 
 |     else: | 
 |         for i in range(numshort): | 
 |             start = i * colheight | 
 |             end = start + colheight | 
 |             cols.append(nodes[start:end]) | 
 |     # | 
 |     # If items continue across columns, make sure they are marked | 
 |     # as continuations so the user knows to look at the previous column. | 
 |     # | 
 |     for i in range(len(cols) - 1): | 
 |         try: | 
 |             prev = cols[i][-1] | 
 |             next = cols[i + 1][0] | 
 |         except IndexError: | 
 |             return cols | 
 |         else: | 
 |             n = min(len(prev.key), len(next.key)) | 
 |             for j in range(n): | 
 |                 if prev.key[j] != next.key[j]: | 
 |                     break | 
 |                 next.continuation = j + 1 | 
 |     return cols | 
 |  | 
 |  | 
 | DL_LEVEL_INDENT = "  " | 
 |  | 
 | def format_column(nodes): | 
 |     strings = ["<dl compact>"] | 
 |     append = strings.append | 
 |     level = 0 | 
 |     previous = [] | 
 |     for node in nodes: | 
 |         current = node.text | 
 |         count = 0 | 
 |         for i in range(min(len(current), len(previous))): | 
 |             if previous[i] != current[i]: | 
 |                 break | 
 |             count = i + 1 | 
 |         if count > level: | 
 |             append("<dl compact>" * (count - level) + "\n") | 
 |             level = count | 
 |         elif level > count: | 
 |             append("\n") | 
 |             append(level * DL_LEVEL_INDENT) | 
 |             append("</dl>" * (level - count)) | 
 |             level = count | 
 |         # else: level == count | 
 |         for i in range(count, len(current) - 1): | 
 |             term = node.text[i] | 
 |             level = level + 1 | 
 |             if node.continuation > i: | 
 |                 extra = " (continued)" | 
 |             else: | 
 |                 extra = "" | 
 |             append("\n<dt>%s%s\n<dd>\n%s<dl compact>" | 
 |                    % (term, extra, level * DL_LEVEL_INDENT)) | 
 |         append("\n%s<dt>%s%s</a>" | 
 |                % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1])) | 
 |         for link in node.links[1:]: | 
 |             append(",\n%s    %s[Link]</a>" % (level * DL_LEVEL_INDENT, link)) | 
 |         previous = current | 
 |     append("\n") | 
 |     append("</dl>" * (level + 1)) | 
 |     return string.join(strings, '') | 
 |  | 
 |  | 
 | def format_nodes(nodes, columns=1): | 
 |     strings = [] | 
 |     append = strings.append | 
 |     if columns > 1: | 
 |         colnos = range(columns) | 
 |         colheight = len(nodes) / columns | 
 |         if len(nodes) % columns: | 
 |             colheight = colheight + 1 | 
 |         colwidth = 100 / columns | 
 |         append('<table width="100%"><tr valign="top">') | 
 |         for col in split_columns(nodes, columns): | 
 |             append('<td width="%d%%">\n' % colwidth) | 
 |             append(format_column(col)) | 
 |             append("\n</td>") | 
 |         append("\n</tr></table>") | 
 |     else: | 
 |         append(format_column(nodes)) | 
 |     append("\n<p>\n") | 
 |     return string.join(strings, '') | 
 |  | 
 |  | 
 | def format_letter(letter): | 
 |     if letter == '.': | 
 |         lettername = ". (dot)" | 
 |     elif letter == '_': | 
 |         lettername = "_ (underscore)" | 
 |     else: | 
 |         lettername = string.upper(letter) | 
 |     return "\n<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \ | 
 |            % (letter, lettername) | 
 |  | 
 |  | 
 | def format_html_letters(nodes, columns=1): | 
 |     letter_groups = split_letters(nodes) | 
 |     items = [] | 
 |     for letter, nodes in letter_groups: | 
 |         s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter) | 
 |         items.append(s) | 
 |     s = ["<hr><center>\n%s</center>\n" % string.join(items, " |\n")] | 
 |     for letter, nodes in letter_groups: | 
 |         s.append(format_letter(letter)) | 
 |         s.append(format_nodes(nodes, columns)) | 
 |     return string.join(s, '') | 
 |  | 
 | def format_html(nodes, columns): | 
 |     return format_nodes(nodes, columns) | 
 |  | 
 |  | 
 | def collapse(nodes): | 
 |     """Collapse sequences of nodes with matching keys into a single node. | 
 |     Destructive.""" | 
 |     if len(nodes) < 2: | 
 |         return | 
 |     prev = nodes[0] | 
 |     i = 1 | 
 |     while i < len(nodes): | 
 |         node = nodes[i] | 
 |         if not node.cmp_entry(prev): | 
 |             prev.links.append(node.links[0]) | 
 |             del nodes[i] | 
 |         else: | 
 |             i = i + 1 | 
 |             prev = node | 
 |  | 
 |  | 
 | def dump(nodes, fp): | 
 |     for node in nodes: | 
 |         fp.write(node.dump()) | 
 |  | 
 |  | 
 | def process_nodes(nodes, columns, letters): | 
 |     nodes.sort() | 
 |     collapse(nodes) | 
 |     if letters: | 
 |         return format_html_letters(nodes, columns) | 
 |     else: | 
 |         return format_html(nodes, columns) | 
 |  | 
 |  | 
 | def main(): | 
 |     import getopt | 
 |     ifn = "-" | 
 |     ofn = "-" | 
 |     columns = 1 | 
 |     letters = 0 | 
 |     opts, args = getopt.getopt(sys.argv[1:], "c:lo:", | 
 |                                ["columns=", "letters", "output="]) | 
 |     for opt, val in opts: | 
 |         if opt in ("-o", "--output"): | 
 |             ofn = val | 
 |         elif opt in ("-c", "--columns"): | 
 |             columns = string.atoi(val) | 
 |         elif opt in ("-l", "--letters"): | 
 |             letters = 1 | 
 |     if not args: | 
 |         args = [ifn] | 
 |     nodes = [] | 
 |     for fn in args: | 
 |         nodes = nodes + load(open(fn)) | 
 |     num_nodes = len(nodes) | 
 |     html = process_nodes(nodes, columns, letters) | 
 |     program = os.path.basename(sys.argv[0]) | 
 |     if ofn == "-": | 
 |         sys.stdout.write(html) | 
 |         sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes)) | 
 |     else: | 
 |         open(ofn, "w").write(html) | 
 |         print | 
 |         print "%s: %d index nodes" % (program, num_nodes) | 
 |  | 
 |  | 
 | if __name__ == "__main__": | 
 |     main() |