Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame^] | 1 | #! /usr/bin/env python |
| 2 | |
| 3 | """ |
| 4 | """ |
| 5 | __version__ = '$Revision$' |
| 6 | |
| 7 | import re |
| 8 | import string |
| 9 | import sys |
| 10 | |
| 11 | |
| 12 | class Node: |
| 13 | |
| 14 | __rmtt = re.compile(r"(.*)<tt>(.*)</tt>(.*)$", re.IGNORECASE) |
| 15 | __rmjunk = re.compile("<#\d+#>") |
| 16 | |
| 17 | def __init__(self, link, str, seqno): |
| 18 | self.links = [link] |
| 19 | self.seqno = seqno |
| 20 | # remove <#\d+#> left in by moving the data out of LaTeX2HTML |
| 21 | str = self.__rmjunk.sub('', str) |
| 22 | # now remove <tt>...</tt> markup; contents remain. |
| 23 | if '<' in str: |
| 24 | m = self.__rmtt.match(str) |
| 25 | if m: |
| 26 | kstr = string.join(m.group(1, 2, 3), '') |
| 27 | else: |
| 28 | kstr = str |
| 29 | else: |
| 30 | kstr = str |
| 31 | kstr = string.lower(kstr) |
| 32 | # build up the text |
| 33 | self.text = [] |
| 34 | parts = string.split(str, '!') |
| 35 | parts = map(string.split, parts, ['@'] * len(parts)) |
| 36 | for entry in parts: |
| 37 | if len(entry) != 1: |
| 38 | key, text = entry |
| 39 | else: |
| 40 | text = entry[0] |
| 41 | self.text.append(text) |
| 42 | # Building the key must be separate since any <tt> has been stripped |
| 43 | # from the key, but can be avoided if both key and text sources are |
| 44 | # the same. |
| 45 | if kstr != str: |
| 46 | self.key = [] |
| 47 | kparts = string.split(kstr, '!') |
| 48 | kparts = map(string.split, kparts, ['@'] * len(kparts)) |
| 49 | for entry in kparts: |
| 50 | if len(entry) != 1: |
| 51 | key, text = entry |
| 52 | else: |
| 53 | key = entry[0] |
| 54 | self.key.append(key) |
| 55 | else: |
| 56 | self.key = self.text |
| 57 | |
| 58 | def __cmp__(self, other): |
| 59 | """Comparison operator includes sequence number, for use with |
| 60 | list.sort().""" |
| 61 | return self.cmp_entry(other) or cmp(self.seqno, other.seqno) |
| 62 | |
| 63 | def cmp_entry(self, other): |
| 64 | """Comparison 'operator' that ignores sequence number.""" |
| 65 | for i in range(min(len(self.key), len(other.key))): |
| 66 | c = (cmp(self.key[i], other.key[i]) |
| 67 | or cmp(self.text[i], other.text[i])) |
| 68 | if c: |
| 69 | return c |
| 70 | return cmp(self.key, other.key) |
| 71 | |
| 72 | def __repr__(self): |
| 73 | return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno) |
| 74 | |
| 75 | def __str__(self): |
| 76 | return string.join(self.key, '!') |
| 77 | |
| 78 | def dump(self): |
| 79 | return "%s\0%s###%s\n" \ |
| 80 | % (string.join(self.links, "\0"), |
| 81 | string.join(self.text, '!'), |
| 82 | self.seqno) |
| 83 | |
| 84 | |
| 85 | def load(fp): |
| 86 | nodes = [] |
| 87 | rx = re.compile(r"(.*)\0(.*)###(.*)$") |
| 88 | while 1: |
| 89 | line = fp.readline() |
| 90 | if not line: |
| 91 | break |
| 92 | m = rx.match(line) |
| 93 | if m: |
| 94 | link, str, seqno = m.group(1, 2, 3) |
| 95 | nodes.append(Node(link, str, seqno)) |
| 96 | return nodes |
| 97 | |
| 98 | |
| 99 | def split_letters(nodes): |
| 100 | letter_groups = [] |
| 101 | group = [] |
| 102 | append = group.append |
| 103 | if nodes: |
| 104 | letter = nodes[0].key[0][0] |
| 105 | letter_groups.append((letter, group)) |
| 106 | for node in nodes: |
| 107 | nletter = node.key[0][0] |
| 108 | if letter != nletter: |
| 109 | letter = nletter |
| 110 | group = [] |
| 111 | letter_groups.append((letter, group)) |
| 112 | append = group.append |
| 113 | append(node) |
| 114 | return letter_groups |
| 115 | |
| 116 | |
| 117 | def format_nodes(nodes): |
| 118 | # Does not create multiple links to multiple targets for the same entry; |
| 119 | # uses a separate entry for each target. This is a bug. |
| 120 | level = 0 |
| 121 | strings = ["<dl compact>"] |
| 122 | append = strings.append |
| 123 | prev = None |
| 124 | for node in nodes: |
| 125 | nlevel = len(node.key) - 1 |
| 126 | if nlevel > level: |
| 127 | if prev is None or node.key[level] != prev.key[level]: |
| 128 | append("%s\n<dl compact>" % node.text[level]) |
| 129 | else: |
| 130 | append("<dl compact>") |
| 131 | level = nlevel |
| 132 | elif nlevel < level: |
| 133 | append("</dl>" * (level - len(node.key) + 1)) |
| 134 | level = nlevel |
| 135 | if prev is not None and node.key[level] != prev.key[level]: |
| 136 | append("</dl>") |
| 137 | else: |
| 138 | append("<dl compact>") |
| 139 | elif level: |
| 140 | if node.key[level-1] != prev.key[level-1]: |
| 141 | append("</dl>\n%s<dl compact>" |
| 142 | % node.text[level-1]) |
| 143 | append("%s%s</a><br>" % (node.links[0], node.text[-1])) |
| 144 | for link in node.links[1:]: |
| 145 | strings[-1] = strings[-1][:-4] + "," |
| 146 | append(link + "[Link]</a><br>") |
| 147 | prev = node |
| 148 | append("</dl>" * (level + 1)) |
| 149 | append("") |
| 150 | append("") |
| 151 | return string.join(strings, "\n") |
| 152 | |
| 153 | |
| 154 | def format_letter(letter): |
| 155 | if letter == '.': |
| 156 | lettername = ". (dot)" |
| 157 | elif letter == '_': |
| 158 | lettername = "_ (underscore)" |
| 159 | else: |
| 160 | lettername = string.upper(letter) |
| 161 | return "<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \ |
| 162 | % (letter, lettername) |
| 163 | |
| 164 | |
| 165 | def format_html(nodes): |
| 166 | letter_groups = split_letters(nodes) |
| 167 | items = [] |
| 168 | for letter, nodes in letter_groups: |
| 169 | s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter) |
| 170 | items.append(s) |
| 171 | s = "<hr><center>\n%s</center>\n" % string.join(items, " |\n") |
| 172 | for letter, nodes in letter_groups: |
| 173 | s = s + format_letter(letter) + format_nodes(nodes) |
| 174 | return s |
| 175 | |
| 176 | |
| 177 | def collapse(nodes): |
| 178 | """Collapse sequences of nodes with matching keys into a single node. |
| 179 | Destructive.""" |
| 180 | if len(nodes) < 2: |
| 181 | return |
| 182 | prev = nodes[0] |
| 183 | i = 1 |
| 184 | while i < len(nodes): |
| 185 | node = nodes[i] |
| 186 | if not node.cmp_entry(prev): |
| 187 | prev.links.append(node.links[0]) |
| 188 | del nodes[i] |
| 189 | ## sys.stderr.write("collapsing %s\n" % `node`) |
| 190 | else: |
| 191 | i = i + 1 |
| 192 | prev = node |
| 193 | |
| 194 | |
| 195 | def dump(nodes, fp): |
| 196 | for node in nodes: |
| 197 | fp.write(node.dump()) |
| 198 | |
| 199 | |
| 200 | def main(): |
| 201 | fn = sys.argv[1] |
| 202 | nodes = load(open(fn)) |
| 203 | nodes.sort() |
| 204 | dump(nodes, open(fn + ".dump-1", "w")) |
| 205 | collapse(nodes) |
| 206 | dump(nodes, open(fn + ".dump-2", "w")) |
| 207 | sys.stdout.write(format_html(nodes)) |
| 208 | |
| 209 | |
| 210 | if __name__ == "__main__": |
| 211 | main() |