| #! /usr/bin/env python |
| |
| __version__ = '$Revision$' |
| |
| import os.path |
| import re |
| import string |
| import sys |
| |
| from xml.sax.saxutils import quoteattr |
| |
| |
| bang_join = "!".join |
| null_join = "".join |
| |
| REPLACEMENTS = [ |
| # Hackish way to deal with macros replaced with simple text |
| (re.compile(r"\\ABC\b"), "ABC"), |
| (re.compile(r"\\ASCII\b"), "ASCII"), |
| (re.compile(r"\\Cpp\b"), "C++"), |
| (re.compile(r"\\EOF\b"), "EOF"), |
| (re.compile(r"\\NULL\b"), "NULL"), |
| (re.compile(r"\\POSIX\b"), "POSIX"), |
| (re.compile(r"\\UNIX\b"), "Unix"), |
| # deal with turds left over from LaTeX2HTML |
| (re.compile(r"<#\d+#>"), ""), |
| ] |
| |
| class Node: |
| continuation = 0 |
| |
| def __init__(self, link, str, seqno): |
| self.links = [link] |
| self.seqno = seqno |
| for pattern, replacement in REPLACEMENTS: |
| str = pattern.sub(replacement, str) |
| # build up the text |
| self.text = split_entry_text(str) |
| self.key = split_entry_key(str) |
| |
| def __cmp__(self, other): |
| """Comparison operator includes sequence number, for use with |
| list.sort().""" |
| return self.cmp_entry(other) or cmp(self.seqno, other.seqno) |
| |
| def cmp_entry(self, other): |
| """Comparison 'operator' that ignores sequence number.""" |
| c = 0 |
| for i in range(min(len(self.key), len(other.key))): |
| c = (cmp_part(self.key[i], other.key[i]) |
| or cmp_part(self.text[i], other.text[i])) |
| if c: |
| break |
| return c or cmp(self.key, other.key) or cmp(self.text, other.text) |
| |
| def __repr__(self): |
| return "<Node for %s (%s)>" % (bang_join(self.text), self.seqno) |
| |
| def __str__(self): |
| return bang_join(self.key) |
| |
| def dump(self): |
| return "%s\1%s###%s\n" \ |
| % ("\1".join(self.links), |
| bang_join(self.text), |
| self.seqno) |
| |
| |
| def cmp_part(s1, s2): |
| result = cmp(s1, s2) |
| if result == 0: |
| return 0 |
| l1 = s1.lower() |
| l2 = s2.lower() |
| minlen = min(len(s1), len(s2)) |
| if len(s1) < len(s2) and l1 == l2[:len(s1)]: |
| result = -1 |
| elif len(s2) < len(s1) and l2 == l1[:len(s2)]: |
| result = 1 |
| else: |
| result = cmp(l1, l2) or cmp(s1, s2) |
| return result |
| |
| |
| def split_entry(str, which): |
| stuff = [] |
| parts = str.split('!') |
| parts = [part.split('@') for part in parts] |
| for entry in parts: |
| if len(entry) != 1: |
| key = entry[which] |
| else: |
| key = entry[0] |
| stuff.append(key) |
| return stuff |
| |
| |
| _rmtt = re.compile(r"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""", |
| re.IGNORECASE) |
| _rmparens = re.compile(r"\(\)") |
| |
| def split_entry_key(str): |
| parts = split_entry(str, 1) |
| for i in range(len(parts)): |
| m = _rmtt.match(parts[i]) |
| if m: |
| parts[i] = null_join(m.group(1, 2, 3)) |
| else: |
| parts[i] = parts[i].lower() |
| # remove '()' from the key: |
| parts[i] = _rmparens.sub('', parts[i]) |
| return map(trim_ignored_letters, parts) |
| |
| |
| def split_entry_text(str): |
| if '<' in str: |
| m = _rmtt.match(str) |
| if m: |
| str = null_join(m.group(1, 2, 3)) |
| return split_entry(str, 1) |
| |
| |
| def load(fp): |
| nodes = [] |
| rx = re.compile("(.*)\1(.*)###(.*)$") |
| while 1: |
| line = fp.readline() |
| if not line: |
| break |
| m = rx.match(line) |
| if m: |
| link, str, seqno = m.group(1, 2, 3) |
| nodes.append(Node(link, str, seqno)) |
| return nodes |
| |
| |
| def trim_ignored_letters(s): |
| # ignore $ to keep environment variables with the |
| # leading letter from the name |
| if s.startswith("$"): |
| return s[1:].lower() |
| else: |
| return s.lower() |
| |
| def get_first_letter(s): |
| if s.startswith("<tex2html_percent_mark>"): |
| return "%" |
| else: |
| return trim_ignored_letters(s)[0] |
| |
| |
| def split_letters(nodes): |
| letter_groups = [] |
| if nodes: |
| group = [] |
| append = group.append |
| letter = get_first_letter(nodes[0].text[0]) |
| letter_groups.append((letter, group)) |
| for node in nodes: |
| nletter = get_first_letter(node.text[0]) |
| if letter != nletter: |
| letter = nletter |
| group = [] |
| letter_groups.append((letter, group)) |
| append = group.append |
| append(node) |
| return letter_groups |
| |
| |
| def group_symbols(groups): |
| entries = [] |
| ident_letters = string.ascii_letters + "_" |
| while groups[0][0] not in ident_letters: |
| entries += groups[0][1] |
| del groups[0] |
| if entries: |
| groups.insert(0, ("Symbols", entries)) |
| |
| |
| # need a function to separate the nodes into columns... |
| def split_columns(nodes, columns=1): |
| if columns <= 1: |
| return [nodes] |
| # This is a rough height; we may have to increase to avoid breaks before |
| # a subitem. |
| colheight = int(len(nodes) / columns) |
| numlong = int(len(nodes) % columns) |
| if numlong: |
| colheight = colheight + 1 |
| else: |
| numlong = columns |
| cols = [] |
| for i in range(numlong): |
| start = i * colheight |
| end = start + colheight |
| cols.append(nodes[start:end]) |
| del nodes[:end] |
| colheight = colheight - 1 |
| try: |
| numshort = int(len(nodes) / colheight) |
| except ZeroDivisionError: |
| cols = cols + (columns - len(cols)) * [[]] |
| else: |
| for i in range(numshort): |
| start = i * colheight |
| end = start + colheight |
| cols.append(nodes[start:end]) |
| # |
| # If items continue across columns, make sure they are marked |
| # as continuations so the user knows to look at the previous column. |
| # |
| for i in range(len(cols) - 1): |
| try: |
| prev = cols[i][-1] |
| next = cols[i + 1][0] |
| except IndexError: |
| return cols |
| else: |
| n = min(len(prev.key), len(next.key)) |
| for j in range(n): |
| if prev.key[j] != next.key[j]: |
| break |
| next.continuation = j + 1 |
| return cols |
| |
| |
| DL_LEVEL_INDENT = " " |
| |
| def format_column(nodes): |
| strings = ["<dl compact='compact'>"] |
| append = strings.append |
| level = 0 |
| previous = [] |
| for node in nodes: |
| current = node.text |
| count = 0 |
| for i in range(min(len(current), len(previous))): |
| if previous[i] != current[i]: |
| break |
| count = i + 1 |
| if count > level: |
| append("<dl compact='compact'>" * (count - level) + "\n") |
| level = count |
| elif level > count: |
| append("\n") |
| append(level * DL_LEVEL_INDENT) |
| append("</dl>" * (level - count)) |
| level = count |
| # else: level == count |
| for i in range(count, len(current) - 1): |
| term = node.text[i] |
| level = level + 1 |
| if node.continuation > i: |
| extra = " (continued)" |
| else: |
| extra = "" |
| append("\n<dt>%s%s\n<dd>\n%s<dl compact='compact'>" |
| % (term, extra, level * DL_LEVEL_INDENT)) |
| append("\n%s<dt>%s%s</a>" |
| % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1])) |
| for link in node.links[1:]: |
| append(",\n%s %s[Link]</a>" % (level * DL_LEVEL_INDENT, link)) |
| previous = current |
| append("\n") |
| append("</dl>" * (level + 1)) |
| return null_join(strings) |
| |
| |
| def format_nodes(nodes, columns=1): |
| strings = [] |
| append = strings.append |
| if columns > 1: |
| colnos = range(columns) |
| colheight = int(len(nodes) / columns) |
| if len(nodes) % columns: |
| colheight = colheight + 1 |
| colwidth = int(100 / columns) |
| append('<table width="100%"><tr valign="top">') |
| for col in split_columns(nodes, columns): |
| append('<td width="%d%%">\n' % colwidth) |
| append(format_column(col)) |
| append("\n</td>") |
| append("\n</tr></table>") |
| else: |
| append(format_column(nodes)) |
| return null_join(strings) |
| |
| |
| def format_letter(letter): |
| if letter == '.': |
| lettername = ". (dot)" |
| elif letter == '_': |
| lettername = "_ (underscore)" |
| else: |
| lettername = letter.capitalize() |
| return "\n<hr />\n<h2 id=%s>%s</h2>\n\n" \ |
| % (quoteattr("letter-" + letter), lettername) |
| |
| |
| def format_html_letters(nodes, columns, group_symbol_nodes): |
| letter_groups = split_letters(nodes) |
| if group_symbol_nodes: |
| group_symbols(letter_groups) |
| items = [] |
| for letter, nodes in letter_groups: |
| s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter) |
| items.append(s) |
| s = ["<hr /><center>\n%s</center>\n" % " |\n".join(items)] |
| for letter, nodes in letter_groups: |
| s.append(format_letter(letter)) |
| s.append(format_nodes(nodes, columns)) |
| return null_join(s) |
| |
| def format_html(nodes, columns): |
| return format_nodes(nodes, columns) |
| |
| |
| def collapse(nodes): |
| """Collapse sequences of nodes with matching keys into a single node. |
| Destructive.""" |
| if len(nodes) < 2: |
| return |
| prev = nodes[0] |
| i = 1 |
| while i < len(nodes): |
| node = nodes[i] |
| if not node.cmp_entry(prev): |
| prev.links.append(node.links[0]) |
| del nodes[i] |
| else: |
| i = i + 1 |
| prev = node |
| |
| |
| def dump(nodes, fp): |
| for node in nodes: |
| fp.write(node.dump()) |
| |
| |
| def process_nodes(nodes, columns, letters=0, group_symbol_nodes=0): |
| nodes.sort() |
| collapse(nodes) |
| if letters: |
| return format_html_letters(nodes, columns, group_symbol_nodes) |
| else: |
| return format_html(nodes, columns) |
| |
| |
| def main(): |
| import getopt |
| ifn = "-" |
| ofn = "-" |
| columns = 1 |
| letters = 0 |
| group_symbol_nodes = 1 |
| opts, args = getopt.getopt(sys.argv[1:], "c:lo:", |
| ["columns=", "dont-group-symbols", |
| "group-symbols", "letters", "output="]) |
| for opt, val in opts: |
| if opt in ("-o", "--output"): |
| ofn = val |
| elif opt in ("-c", "--columns"): |
| columns = int(val, 10) |
| elif opt in ("-l", "--letters"): |
| letters = 1 |
| elif opt == "--group-symbols": |
| group_symbol_nodes = 1 |
| elif opt == "--dont-group-symbols": |
| group_symbol_nodes = 0 |
| if not args: |
| args = [ifn] |
| nodes = [] |
| for fn in args: |
| nodes = nodes + load(open(fn)) |
| num_nodes = len(nodes) |
| html = process_nodes(nodes, columns, letters, group_symbol_nodes) |
| program = os.path.basename(sys.argv[0]) |
| if ofn == "-": |
| sys.stdout.write(html) |
| sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes)) |
| else: |
| open(ofn, "w").write(html) |
| print |
| print "%s: %d index nodes" % (program, num_nodes) |
| |
| |
| if __name__ == "__main__": |
| main() |