Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 1 | #! /usr/bin/env python |
| 2 | |
| 3 | """ |
| 4 | """ |
| 5 | __version__ = '$Revision$' |
| 6 | |
| 7 | import re |
| 8 | import string |
| 9 | import sys |
| 10 | |
| 11 | |
| 12 | class Node: |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 13 | __rmjunk = re.compile("<#\d+#>") |
| 14 | |
| 15 | def __init__(self, link, str, seqno): |
| 16 | self.links = [link] |
| 17 | self.seqno = seqno |
| 18 | # remove <#\d+#> left in by moving the data out of LaTeX2HTML |
| 19 | str = self.__rmjunk.sub('', str) |
| 20 | # now remove <tt>...</tt> markup; contents remain. |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 21 | # build up the text |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 22 | self.text = split_entry_text(str) |
| 23 | self.key = split_entry_key(str) |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 24 | |
| 25 | def __cmp__(self, other): |
| 26 | """Comparison operator includes sequence number, for use with |
| 27 | list.sort().""" |
| 28 | return self.cmp_entry(other) or cmp(self.seqno, other.seqno) |
| 29 | |
| 30 | def cmp_entry(self, other): |
| 31 | """Comparison 'operator' that ignores sequence number.""" |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 32 | c = 0 |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 33 | for i in range(min(len(self.key), len(other.key))): |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 34 | c = (cmp_part(self.key[i], other.key[i]) |
| 35 | or cmp_part(self.text[i], other.text[i])) |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 36 | if c: |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 37 | break |
| 38 | return c or cmp(self.key, other.key) or cmp(self.text, other.text) |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 39 | |
| 40 | def __repr__(self): |
| 41 | return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno) |
| 42 | |
| 43 | def __str__(self): |
| 44 | return string.join(self.key, '!') |
| 45 | |
| 46 | def dump(self): |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 47 | return "%s\1%s###%s\n" \ |
| 48 | % (string.join(self.links, "\1"), |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 49 | string.join(self.text, '!'), |
| 50 | self.seqno) |
| 51 | |
| 52 | |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 53 | def cmp_part(s1, s2): |
| 54 | result = cmp(s1, s2) |
| 55 | if result == 0: |
| 56 | return 0 |
| 57 | l1 = string.lower(s1) |
| 58 | l2 = string.lower(s2) |
| 59 | minlen = min(len(s1), len(s2)) |
| 60 | if len(s1) < len(s2) and l1 == l2[:len(s1)]: |
| 61 | result = -1 |
| 62 | elif len(s2) < len(s1) and l2 == l1[:len(s2)]: |
| 63 | result = 1 |
| 64 | else: |
| 65 | result = cmp(l1, l2) or cmp(s1, s2) |
| 66 | return result |
| 67 | |
| 68 | |
| 69 | def split_entry(str, which): |
| 70 | stuff = [] |
| 71 | parts = string.split(str, '!') |
| 72 | parts = map(string.split, parts, ['@'] * len(parts)) |
| 73 | for entry in parts: |
| 74 | if len(entry) != 1: |
| 75 | key = entry[which] |
| 76 | else: |
| 77 | key = entry[0] |
| 78 | stuff.append(key) |
| 79 | return stuff |
| 80 | |
| 81 | |
| 82 | _rmtt = re.compile(r"(.*)<tt>(.*)</tt>(.*)$", re.IGNORECASE) |
| 83 | _rmparens = re.compile(r"\(\)") |
| 84 | |
| 85 | def split_entry_key(str): |
| 86 | parts = split_entry(str, 1) |
| 87 | for i in range(len(parts)): |
| 88 | m = _rmtt.match(parts[i]) |
| 89 | if m: |
| 90 | parts[i] = string.join(m.group(1, 2, 3), '') |
| 91 | else: |
| 92 | parts[i] = string.lower(parts[i]) |
| 93 | # remove '()' from the key: |
| 94 | parts[i] = _rmparens.sub('', parts[i]) |
| 95 | return map(trim_ignored_letters, parts) |
| 96 | |
| 97 | |
| 98 | def split_entry_text(str): |
| 99 | if '<' in str: |
| 100 | m = _rmtt.match(str) |
| 101 | if m: |
| 102 | str = string.join(m.group(1, 2, 3), '') |
| 103 | return split_entry(str, 1) |
| 104 | |
| 105 | |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 106 | def load(fp): |
| 107 | nodes = [] |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 108 | rx = re.compile("(.*)\1(.*)###(.*)$") |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 109 | while 1: |
| 110 | line = fp.readline() |
| 111 | if not line: |
| 112 | break |
| 113 | m = rx.match(line) |
| 114 | if m: |
| 115 | link, str, seqno = m.group(1, 2, 3) |
| 116 | nodes.append(Node(link, str, seqno)) |
| 117 | return nodes |
| 118 | |
| 119 | |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 120 | # ignore $ to keep environment variables with the leading letter from the name |
| 121 | SKIP_LETTERS = "$" |
| 122 | |
| 123 | def trim_ignored_letters(s): |
| 124 | s = string.lower(s) |
| 125 | while s[0] in SKIP_LETTERS: |
| 126 | s = s[1:] |
| 127 | return s |
| 128 | |
| 129 | def get_first_letter(s): |
| 130 | return string.lower(trim_ignored_letters(s)[0]) |
| 131 | |
| 132 | |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 133 | def split_letters(nodes): |
| 134 | letter_groups = [] |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 135 | if nodes: |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 136 | group = [] |
| 137 | append = group.append |
| 138 | letter = get_first_letter(nodes[0].text[0]) |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 139 | letter_groups.append((letter, group)) |
| 140 | for node in nodes: |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 141 | nletter = get_first_letter(node.text[0]) |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 142 | if letter != nletter: |
| 143 | letter = nletter |
| 144 | group = [] |
| 145 | letter_groups.append((letter, group)) |
| 146 | append = group.append |
| 147 | append(node) |
| 148 | return letter_groups |
| 149 | |
| 150 | |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 151 | # need a function to separate the nodes into columns... |
| 152 | def split_columns(nodes, columns=1): |
| 153 | if columns <= 1: |
Fred Drake | 077fffa | 1998-08-07 20:49:54 +0000 | [diff] [blame] | 154 | return [nodes] |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 155 | # This is a rough height; we may have to increase to avoid breaks before |
| 156 | # a subitem. |
| 157 | colheight = len(nodes) / columns |
| 158 | numlong = len(nodes) % columns |
| 159 | if numlong: |
| 160 | colheight = colheight + 1 |
| 161 | else: |
| 162 | numlong = columns |
| 163 | cols = [] |
| 164 | for i in range(numlong): |
| 165 | start = i * colheight |
| 166 | end = start + colheight |
| 167 | cols.append(nodes[start:end]) |
| 168 | del nodes[:end] |
| 169 | colheight = colheight - 1 |
| 170 | try: |
| 171 | numshort = len(nodes) / colheight |
| 172 | except ZeroDivisionError: |
| 173 | cols = cols + (columns - len(cols)) * [[]] |
| 174 | else: |
| 175 | for i in range(numshort): |
| 176 | start = i * colheight |
| 177 | end = start + colheight |
| 178 | cols.append(nodes[start:end]) |
Fred Drake | 077fffa | 1998-08-07 20:49:54 +0000 | [diff] [blame] | 179 | return cols |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 180 | |
| 181 | |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 182 | DL_LEVEL_INDENT = " " |
| 183 | |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 184 | def format_column(nodes): |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 185 | strings = ["<dl compact>"] |
| 186 | append = strings.append |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 187 | level = 0 |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 188 | previous = [] |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 189 | for node in nodes: |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 190 | current = node.text |
| 191 | count = 0 |
| 192 | for i in range(min(len(current), len(previous))): |
| 193 | if previous[i] != current[i]: |
| 194 | break |
| 195 | count = i + 1 |
| 196 | if count > level: |
| 197 | append("<dl compact>" * (count - level) + "\n") |
| 198 | level = count |
| 199 | elif level > count: |
| 200 | append("\n") |
| 201 | append(level * DL_LEVEL_INDENT) |
| 202 | append("</dl>" * (level - count)) |
| 203 | level = count |
| 204 | # else: level == count |
| 205 | for i in range(count, len(current) - 1): |
| 206 | term = node.text[i] |
| 207 | level = level + 1 |
| 208 | append("\n<dt>%s\n<dd>\n%s<dl compact>" |
| 209 | % (term, level * DL_LEVEL_INDENT)) |
| 210 | append("\n%s<dt>%s%s</a>" |
| 211 | % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1])) |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 212 | for link in node.links[1:]: |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 213 | append(",\n%s %s[Link]</a>" % (level * DL_LEVEL_INDENT, link)) |
| 214 | previous = current |
| 215 | append("\n") |
Fred Drake | 2b8c95e | 1998-04-11 16:26:02 +0000 | [diff] [blame] | 216 | append("</dl>" * (level + 1)) |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 217 | return string.join(strings, '') |
| 218 | |
| 219 | |
| 220 | def format_nodes(nodes, columns=1): |
| 221 | strings = [] |
| 222 | append = strings.append |
| 223 | if columns > 1: |
| 224 | colnos = range(columns) |
| 225 | colheight = len(nodes) / columns |
| 226 | if len(nodes) % columns: |
| 227 | colheight = colheight + 1 |
| 228 | colwidth = 100 / columns |
| 229 | append('<table width="100%"><tr valign="top">') |
| 230 | for col in split_columns(nodes, columns): |
| 231 | append('<td width="%d%%">\n' % colwidth) |
| 232 | append(format_column(col)) |
| 233 | append("\n</td>") |
| 234 | append("\n</tr></table>") |
| 235 | else: |
| 236 | append(format_column(nodes)) |
Fred Drake | 2b8c95e | 1998-04-11 16:26:02 +0000 | [diff] [blame] | 237 | append("\n<p>\n") |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 238 | return string.join(strings, '') |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 239 | |
| 240 | |
| 241 | def format_letter(letter): |
| 242 | if letter == '.': |
| 243 | lettername = ". (dot)" |
| 244 | elif letter == '_': |
| 245 | lettername = "_ (underscore)" |
| 246 | else: |
| 247 | lettername = string.upper(letter) |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 248 | return "\n<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \ |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 249 | % (letter, lettername) |
| 250 | |
| 251 | |
Fred Drake | 077fffa | 1998-08-07 20:49:54 +0000 | [diff] [blame] | 252 | def format_html_letters(nodes, columns=1): |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 253 | letter_groups = split_letters(nodes) |
| 254 | items = [] |
| 255 | for letter, nodes in letter_groups: |
| 256 | s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter) |
| 257 | items.append(s) |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 258 | s = ["<hr><center>\n%s</center>\n" % string.join(items, " |\n")] |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 259 | for letter, nodes in letter_groups: |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 260 | s.append(format_letter(letter)) |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 261 | s.append(format_nodes(nodes, columns)) |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 262 | return string.join(s, '') |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 263 | |
Fred Drake | 077fffa | 1998-08-07 20:49:54 +0000 | [diff] [blame] | 264 | def format_html(nodes, columns): |
| 265 | return format_nodes(nodes, columns) |
| 266 | |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 267 | |
| 268 | def collapse(nodes): |
| 269 | """Collapse sequences of nodes with matching keys into a single node. |
| 270 | Destructive.""" |
| 271 | if len(nodes) < 2: |
| 272 | return |
| 273 | prev = nodes[0] |
| 274 | i = 1 |
| 275 | while i < len(nodes): |
| 276 | node = nodes[i] |
| 277 | if not node.cmp_entry(prev): |
| 278 | prev.links.append(node.links[0]) |
| 279 | del nodes[i] |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 280 | else: |
| 281 | i = i + 1 |
| 282 | prev = node |
| 283 | |
| 284 | |
| 285 | def dump(nodes, fp): |
| 286 | for node in nodes: |
| 287 | fp.write(node.dump()) |
| 288 | |
| 289 | |
| 290 | def main(): |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 291 | import getopt |
| 292 | ifn = "-" |
| 293 | ofn = "-" |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 294 | columns = 1 |
Fred Drake | 077fffa | 1998-08-07 20:49:54 +0000 | [diff] [blame] | 295 | letters = 0 |
| 296 | opts, args = getopt.getopt(sys.argv[1:], "c:lo:", |
| 297 | ["columns=", "letters", "output="]) |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 298 | for opt, val in opts: |
| 299 | if opt in ("-o", "--output"): |
| 300 | ofn = val |
Fred Drake | 7cbf462 | 1998-08-07 19:50:13 +0000 | [diff] [blame] | 301 | elif opt in ("-c", "--columns"): |
| 302 | columns = string.atoi(val) |
Fred Drake | 077fffa | 1998-08-07 20:49:54 +0000 | [diff] [blame] | 303 | elif opt in ("-l", "--letters"): |
| 304 | letters = 1 |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 305 | if not args: |
| 306 | args = [ifn] |
| 307 | nodes = [] |
| 308 | for fn in args: |
| 309 | nodes = nodes + load(open(fn)) |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 310 | nodes.sort() |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 311 | collapse(nodes) |
Fred Drake | 077fffa | 1998-08-07 20:49:54 +0000 | [diff] [blame] | 312 | if letters: |
| 313 | html = format_html_letters(nodes, columns) |
| 314 | else: |
| 315 | html = format_html(nodes, columns) |
Fred Drake | 058068d | 1998-04-08 23:12:51 +0000 | [diff] [blame] | 316 | if ofn == "-": |
| 317 | sys.stdout.write(html) |
| 318 | else: |
| 319 | open(ofn, "w").write(html) |
Fred Drake | ec56109 | 1998-03-27 05:25:43 +0000 | [diff] [blame] | 320 | |
| 321 | |
| 322 | if __name__ == "__main__": |
| 323 | main() |