Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 1 | #! /usr/bin/env python |
| 2 | |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 3 | __version__ = '$Revision$' |
| 4 | |
Fred Drake | 03a0235 | 1998-12-28 20:46:53 +0000 | [diff] [blame] | 5 | import os |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 6 | import re |
| 7 | import string |
| 8 | import sys |
| 9 | |
| 10 | |
| 11 | class Node: |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 12 | __rmjunk = re.compile("<#\d+#>") |
| 13 | |
| 14 | def __init__(self, link, str, seqno): |
| 15 | self.links = [link] |
| 16 | self.seqno = seqno |
| 17 | # remove <#\d+#> left in by moving the data out of LaTeX2HTML |
| 18 | str = self.__rmjunk.sub('', str) |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 19 | # build up the text |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 20 | self.text = split_entry_text(str) |
| 21 | self.key = split_entry_key(str) |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 22 | |
| 23 | def __cmp__(self, other): |
| 24 | """Comparison operator includes sequence number, for use with |
| 25 | list.sort().""" |
| 26 | return self.cmp_entry(other) or cmp(self.seqno, other.seqno) |
| 27 | |
| 28 | def cmp_entry(self, other): |
| 29 | """Comparison 'operator' that ignores sequence number.""" |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 30 | c = 0 |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 31 | for i in range(min(len(self.key), len(other.key))): |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 32 | c = (cmp_part(self.key[i], other.key[i]) |
| 33 | or cmp_part(self.text[i], other.text[i])) |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 34 | if c: |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 35 | break |
| 36 | return c or cmp(self.key, other.key) or cmp(self.text, other.text) |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 37 | |
| 38 | def __repr__(self): |
| 39 | return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno) |
| 40 | |
| 41 | def __str__(self): |
| 42 | return string.join(self.key, '!') |
| 43 | |
| 44 | def dump(self): |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 45 | return "%s\1%s###%s\n" \ |
| 46 | % (string.join(self.links, "\1"), |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 47 | string.join(self.text, '!'), |
| 48 | self.seqno) |
| 49 | |
| 50 | |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 51 | def cmp_part(s1, s2): |
| 52 | result = cmp(s1, s2) |
| 53 | if result == 0: |
| 54 | return 0 |
| 55 | l1 = string.lower(s1) |
| 56 | l2 = string.lower(s2) |
| 57 | minlen = min(len(s1), len(s2)) |
| 58 | if len(s1) < len(s2) and l1 == l2[:len(s1)]: |
| 59 | result = -1 |
| 60 | elif len(s2) < len(s1) and l2 == l1[:len(s2)]: |
| 61 | result = 1 |
| 62 | else: |
| 63 | result = cmp(l1, l2) or cmp(s1, s2) |
| 64 | return result |
| 65 | |
| 66 | |
| 67 | def split_entry(str, which): |
| 68 | stuff = [] |
| 69 | parts = string.split(str, '!') |
| 70 | parts = map(string.split, parts, ['@'] * len(parts)) |
| 71 | for entry in parts: |
| 72 | if len(entry) != 1: |
| 73 | key = entry[which] |
| 74 | else: |
| 75 | key = entry[0] |
| 76 | stuff.append(key) |
| 77 | return stuff |
| 78 | |
| 79 | |
Fred Drake | 4cc902f | 1999-02-18 16:11:12 +0000 | [diff] [blame] | 80 | _rmtt = re.compile(r"(.*)<tt(?: class=[a-z0-9]+)?>(.*)</tt>(.*)$", |
| 81 | re.IGNORECASE) |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 82 | _rmparens = re.compile(r"\(\)") |
| 83 | |
| 84 | def split_entry_key(str): |
| 85 | parts = split_entry(str, 1) |
| 86 | for i in range(len(parts)): |
| 87 | m = _rmtt.match(parts[i]) |
| 88 | if m: |
| 89 | parts[i] = string.join(m.group(1, 2, 3), '') |
| 90 | else: |
| 91 | parts[i] = string.lower(parts[i]) |
| 92 | # remove '()' from the key: |
| 93 | parts[i] = _rmparens.sub('', parts[i]) |
| 94 | return map(trim_ignored_letters, parts) |
| 95 | |
| 96 | |
| 97 | def split_entry_text(str): |
| 98 | if '<' in str: |
| 99 | m = _rmtt.match(str) |
| 100 | if m: |
| 101 | str = string.join(m.group(1, 2, 3), '') |
| 102 | return split_entry(str, 1) |
| 103 | |
| 104 | |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 105 | def load(fp): |
| 106 | nodes = [] |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 107 | rx = re.compile("(.*)\1(.*)###(.*)$") |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 108 | while 1: |
| 109 | line = fp.readline() |
| 110 | if not line: |
| 111 | break |
| 112 | m = rx.match(line) |
| 113 | if m: |
| 114 | link, str, seqno = m.group(1, 2, 3) |
| 115 | nodes.append(Node(link, str, seqno)) |
| 116 | return nodes |
| 117 | |
| 118 | |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 119 | def trim_ignored_letters(s): |
Fred Drake | 3b07480 | 1999-01-04 22:00:56 +0000 | [diff] [blame] | 120 | # ignore $ to keep environment variables with the |
| 121 | # leading letter from the name |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 122 | s = string.lower(s) |
Fred Drake | 3b07480 | 1999-01-04 22:00:56 +0000 | [diff] [blame] | 123 | if s[0] == "$": |
| 124 | return s[1:] |
| 125 | else: |
| 126 | return s |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 127 | |
| 128 | def get_first_letter(s): |
| 129 | return string.lower(trim_ignored_letters(s)[0]) |
| 130 | |
| 131 | |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 132 | def split_letters(nodes): |
| 133 | letter_groups = [] |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 134 | if nodes: |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 135 | group = [] |
| 136 | append = group.append |
| 137 | letter = get_first_letter(nodes[0].text[0]) |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 138 | letter_groups.append((letter, group)) |
| 139 | for node in nodes: |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 140 | nletter = get_first_letter(node.text[0]) |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 141 | if letter != nletter: |
| 142 | letter = nletter |
| 143 | group = [] |
| 144 | letter_groups.append((letter, group)) |
| 145 | append = group.append |
| 146 | append(node) |
| 147 | return letter_groups |
| 148 | |
| 149 | |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 150 | # need a function to separate the nodes into columns... |
| 151 | def split_columns(nodes, columns=1): |
| 152 | if columns <= 1: |
Fred Drake | 077fffa | 1998-08-07 20:49:54 +0000 | [diff] [blame] | 153 | return [nodes] |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 154 | # This is a rough height; we may have to increase to avoid breaks before |
| 155 | # a subitem. |
| 156 | colheight = len(nodes) / columns |
| 157 | numlong = len(nodes) % columns |
| 158 | if numlong: |
| 159 | colheight = colheight + 1 |
| 160 | else: |
| 161 | numlong = columns |
| 162 | cols = [] |
| 163 | for i in range(numlong): |
| 164 | start = i * colheight |
| 165 | end = start + colheight |
| 166 | cols.append(nodes[start:end]) |
| 167 | del nodes[:end] |
| 168 | colheight = colheight - 1 |
| 169 | try: |
| 170 | numshort = len(nodes) / colheight |
| 171 | except ZeroDivisionError: |
| 172 | cols = cols + (columns - len(cols)) * [[]] |
| 173 | else: |
| 174 | for i in range(numshort): |
| 175 | start = i * colheight |
| 176 | end = start + colheight |
| 177 | cols.append(nodes[start:end]) |
Fred Drake | 077fffa | 1998-08-07 20:49:54 +0000 | [diff] [blame] | 178 | return cols |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 179 | |
| 180 | |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 181 | DL_LEVEL_INDENT = " " |
| 182 | |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 183 | def format_column(nodes): |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 184 | strings = ["<dl compact>"] |
| 185 | append = strings.append |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 186 | level = 0 |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 187 | previous = [] |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 188 | for node in nodes: |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 189 | current = node.text |
| 190 | count = 0 |
| 191 | for i in range(min(len(current), len(previous))): |
| 192 | if previous[i] != current[i]: |
| 193 | break |
| 194 | count = i + 1 |
| 195 | if count > level: |
| 196 | append("<dl compact>" * (count - level) + "\n") |
| 197 | level = count |
| 198 | elif level > count: |
| 199 | append("\n") |
| 200 | append(level * DL_LEVEL_INDENT) |
| 201 | append("</dl>" * (level - count)) |
| 202 | level = count |
| 203 | # else: level == count |
| 204 | for i in range(count, len(current) - 1): |
| 205 | term = node.text[i] |
| 206 | level = level + 1 |
| 207 | append("\n<dt>%s\n<dd>\n%s<dl compact>" |
| 208 | % (term, level * DL_LEVEL_INDENT)) |
| 209 | append("\n%s<dt>%s%s</a>" |
| 210 | % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1])) |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 211 | for link in node.links[1:]: |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 212 | append(",\n%s %s[Link]</a>" % (level * DL_LEVEL_INDENT, link)) |
| 213 | previous = current |
| 214 | append("\n") |
Fred Drake | 2b8c95e | 1998-04-11 16:26:02 +0000 | [diff] [blame] | 215 | append("</dl>" * (level + 1)) |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 216 | return string.join(strings, '') |
| 217 | |
| 218 | |
| 219 | def format_nodes(nodes, columns=1): |
| 220 | strings = [] |
| 221 | append = strings.append |
| 222 | if columns > 1: |
| 223 | colnos = range(columns) |
| 224 | colheight = len(nodes) / columns |
| 225 | if len(nodes) % columns: |
| 226 | colheight = colheight + 1 |
| 227 | colwidth = 100 / columns |
| 228 | append('<table width="100%"><tr valign="top">') |
| 229 | for col in split_columns(nodes, columns): |
| 230 | append('<td width="%d%%">\n' % colwidth) |
| 231 | append(format_column(col)) |
| 232 | append("\n</td>") |
| 233 | append("\n</tr></table>") |
| 234 | else: |
| 235 | append(format_column(nodes)) |
Fred Drake | 2b8c95e | 1998-04-11 16:26:02 +0000 | [diff] [blame] | 236 | append("\n<p>\n") |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 237 | return string.join(strings, '') |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 238 | |
| 239 | |
| 240 | def format_letter(letter): |
| 241 | if letter == '.': |
| 242 | lettername = ". (dot)" |
| 243 | elif letter == '_': |
| 244 | lettername = "_ (underscore)" |
| 245 | else: |
| 246 | lettername = string.upper(letter) |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 247 | return "\n<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \ |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 248 | % (letter, lettername) |
| 249 | |
| 250 | |
Fred Drake | 077fffa | 1998-08-07 20:49:54 +0000 | [diff] [blame] | 251 | def format_html_letters(nodes, columns=1): |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 252 | letter_groups = split_letters(nodes) |
| 253 | items = [] |
| 254 | for letter, nodes in letter_groups: |
| 255 | s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter) |
| 256 | items.append(s) |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 257 | s = ["<hr><center>\n%s</center>\n" % string.join(items, " |\n")] |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 258 | for letter, nodes in letter_groups: |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 259 | s.append(format_letter(letter)) |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 260 | s.append(format_nodes(nodes, columns)) |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 261 | return string.join(s, '') |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 262 | |
Fred Drake | 077fffa | 1998-08-07 20:49:54 +0000 | [diff] [blame] | 263 | def format_html(nodes, columns): |
| 264 | return format_nodes(nodes, columns) |
| 265 | |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 266 | |
| 267 | def collapse(nodes): |
| 268 | """Collapse sequences of nodes with matching keys into a single node. |
| 269 | Destructive.""" |
| 270 | if len(nodes) < 2: |
| 271 | return |
| 272 | prev = nodes[0] |
| 273 | i = 1 |
| 274 | while i < len(nodes): |
| 275 | node = nodes[i] |
| 276 | if not node.cmp_entry(prev): |
| 277 | prev.links.append(node.links[0]) |
| 278 | del nodes[i] |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 279 | else: |
| 280 | i = i + 1 |
| 281 | prev = node |
| 282 | |
| 283 | |
| 284 | def dump(nodes, fp): |
| 285 | for node in nodes: |
| 286 | fp.write(node.dump()) |
| 287 | |
| 288 | |
Fred Drake | 711fe02 | 1999-02-24 16:36:48 +0000 | [diff] [blame] | 289 | def process_nodes(nodes, columns, letters): |
| 290 | nodes.sort() |
| 291 | collapse(nodes) |
| 292 | if letters: |
| 293 | return format_html_letters(nodes, columns) |
| 294 | else: |
| 295 | return format_html(nodes, columns) |
| 296 | |
| 297 | |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 298 | def main(): |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 299 | import getopt |
| 300 | ifn = "-" |
| 301 | ofn = "-" |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 302 | columns = 1 |
Fred Drake | 077fffa | 1998-08-07 20:49:54 +0000 | [diff] [blame] | 303 | letters = 0 |
| 304 | opts, args = getopt.getopt(sys.argv[1:], "c:lo:", |
| 305 | ["columns=", "letters", "output="]) |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 306 | for opt, val in opts: |
| 307 | if opt in ("-o", "--output"): |
| 308 | ofn = val |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 309 | elif opt in ("-c", "--columns"): |
| 310 | columns = string.atoi(val) |
Fred Drake | 077fffa | 1998-08-07 20:49:54 +0000 | [diff] [blame] | 311 | elif opt in ("-l", "--letters"): |
| 312 | letters = 1 |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 313 | if not args: |
| 314 | args = [ifn] |
| 315 | nodes = [] |
| 316 | for fn in args: |
| 317 | nodes = nodes + load(open(fn)) |
Fred Drake | 03a0235 | 1998-12-28 20:46:53 +0000 | [diff] [blame] | 318 | num_nodes = len(nodes) |
Fred Drake | 711fe02 | 1999-02-24 16:36:48 +0000 | [diff] [blame] | 319 | html = process_nodes(nodes, columns, letters) |
Fred Drake | 03a0235 | 1998-12-28 20:46:53 +0000 | [diff] [blame] | 320 | program = os.path.basename(sys.argv[0]) |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 321 | if ofn == "-": |
| 322 | sys.stdout.write(html) |
Fred Drake | 03a0235 | 1998-12-28 20:46:53 +0000 | [diff] [blame] | 323 | sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes)) |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 324 | else: |
| 325 | open(ofn, "w").write(html) |
Fred Drake | 03a0235 | 1998-12-28 20:46:53 +0000 | [diff] [blame] | 326 | print |
| 327 | print "%s: %d index nodes" % (program, num_nodes) |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 328 | |
| 329 | |
| 330 | if __name__ == "__main__": |
| 331 | main() |