blob: 0e5ba84a1181579e9432cb269fea5827322d4f32 [file] [log] [blame]
Fred Drakeec561091998-03-27 05:25:43 +00001#! /usr/bin/env python
2
Fred Drakeec561091998-03-27 05:25:43 +00003__version__ = '$Revision$'
4
Fred Drake071972e2002-10-16 15:30:17 +00005import os.path
Fred Drakeec561091998-03-27 05:25:43 +00006import re
7import string
8import sys
9
Fred Drake39724572003-11-25 16:21:00 +000010from xml.sax.saxutils import quoteattr
11
Fred Drakeec561091998-03-27 05:25:43 +000012
Fred Drake3d422662001-12-26 19:55:14 +000013bang_join = "!".join
14null_join = "".join
15
Fred Drake63a01912004-07-08 03:56:12 +000016REPLACEMENTS = [
17 # Hackish way to deal with macros replaced with simple text
18 (re.compile(r"\\ABC\b"), "ABC"),
19 (re.compile(r"\\ASCII\b"), "ASCII"),
20 (re.compile(r"\\Cpp\b"), "C++"),
21 (re.compile(r"\\EOF\b"), "EOF"),
22 (re.compile(r"\\NULL\b"), "NULL"),
23 (re.compile(r"\\POSIX\b"), "POSIX"),
24 (re.compile(r"\\UNIX\b"), "Unix"),
25 # deal with turds left over from LaTeX2HTML
26 (re.compile(r"<#\d+#>"), ""),
27 ]
Fred Drake3d422662001-12-26 19:55:14 +000028
Fred Drakeec561091998-03-27 05:25:43 +000029class Node:
Fred Drakeba828782000-04-03 04:19:14 +000030 continuation = 0
31
Fred Drakeec561091998-03-27 05:25:43 +000032 def __init__(self, link, str, seqno):
33 self.links = [link]
34 self.seqno = seqno
Fred Drake63a01912004-07-08 03:56:12 +000035 for pattern, replacement in REPLACEMENTS:
36 str = pattern.sub(replacement, str)
Fred Drakeec561091998-03-27 05:25:43 +000037 # build up the text
Fred Drake058068d1998-04-08 23:12:51 +000038 self.text = split_entry_text(str)
Guido van Rossum992d4a32007-07-11 13:09:30 +000039 self.key = list(split_entry_key(str))
40
Collin Winter65d09d42007-03-21 02:11:39 +000041 def __eq__(self, other):
42 return cmp(self, other) == 0
Guido van Rossum992d4a32007-07-11 13:09:30 +000043
Collin Winter65d09d42007-03-21 02:11:39 +000044 def __lt__(self, other):
45 return cmp(self, other) == -1
Guido van Rossum992d4a32007-07-11 13:09:30 +000046
Collin Winter65d09d42007-03-21 02:11:39 +000047 def __gt__(self, other):
48 return cmp(self, other) == 1
Fred Drakeec561091998-03-27 05:25:43 +000049
50 def __cmp__(self, other):
51 """Comparison operator includes sequence number, for use with
52 list.sort()."""
53 return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
54
55 def cmp_entry(self, other):
56 """Comparison 'operator' that ignores sequence number."""
Fred Drake058068d1998-04-08 23:12:51 +000057 c = 0
Fred Drakeec561091998-03-27 05:25:43 +000058 for i in range(min(len(self.key), len(other.key))):
Fred Drake058068d1998-04-08 23:12:51 +000059 c = (cmp_part(self.key[i], other.key[i])
60 or cmp_part(self.text[i], other.text[i]))
Fred Drakeec561091998-03-27 05:25:43 +000061 if c:
Fred Drake058068d1998-04-08 23:12:51 +000062 break
63 return c or cmp(self.key, other.key) or cmp(self.text, other.text)
Fred Drakeec561091998-03-27 05:25:43 +000064
65 def __repr__(self):
Fred Drake3d422662001-12-26 19:55:14 +000066 return "<Node for %s (%s)>" % (bang_join(self.text), self.seqno)
Fred Drakeec561091998-03-27 05:25:43 +000067
68 def __str__(self):
Fred Drake3d422662001-12-26 19:55:14 +000069 return bang_join(self.key)
Fred Drakeec561091998-03-27 05:25:43 +000070
71 def dump(self):
Fred Drake058068d1998-04-08 23:12:51 +000072 return "%s\1%s###%s\n" \
Fred Drake071972e2002-10-16 15:30:17 +000073 % ("\1".join(self.links),
Fred Drake3d422662001-12-26 19:55:14 +000074 bang_join(self.text),
Fred Drakeec561091998-03-27 05:25:43 +000075 self.seqno)
76
77
Fred Drake058068d1998-04-08 23:12:51 +000078def cmp_part(s1, s2):
79 result = cmp(s1, s2)
80 if result == 0:
81 return 0
Fred Drake3d422662001-12-26 19:55:14 +000082 l1 = s1.lower()
83 l2 = s2.lower()
Fred Drake058068d1998-04-08 23:12:51 +000084 minlen = min(len(s1), len(s2))
85 if len(s1) < len(s2) and l1 == l2[:len(s1)]:
86 result = -1
87 elif len(s2) < len(s1) and l2 == l1[:len(s2)]:
88 result = 1
89 else:
90 result = cmp(l1, l2) or cmp(s1, s2)
91 return result
92
93
94def split_entry(str, which):
95 stuff = []
Fred Drake3d422662001-12-26 19:55:14 +000096 parts = str.split('!')
97 parts = [part.split('@') for part in parts]
Fred Drake058068d1998-04-08 23:12:51 +000098 for entry in parts:
99 if len(entry) != 1:
100 key = entry[which]
101 else:
102 key = entry[0]
103 stuff.append(key)
104 return stuff
105
106
Fred Drakeba828782000-04-03 04:19:14 +0000107_rmtt = re.compile(r"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""",
Fred Drake4cc902f1999-02-18 16:11:12 +0000108 re.IGNORECASE)
Fred Drake058068d1998-04-08 23:12:51 +0000109_rmparens = re.compile(r"\(\)")
110
111def split_entry_key(str):
112 parts = split_entry(str, 1)
113 for i in range(len(parts)):
114 m = _rmtt.match(parts[i])
115 if m:
Fred Drake3d422662001-12-26 19:55:14 +0000116 parts[i] = null_join(m.group(1, 2, 3))
Fred Drake058068d1998-04-08 23:12:51 +0000117 else:
Fred Drake3d422662001-12-26 19:55:14 +0000118 parts[i] = parts[i].lower()
Fred Drake058068d1998-04-08 23:12:51 +0000119 # remove '()' from the key:
120 parts[i] = _rmparens.sub('', parts[i])
121 return map(trim_ignored_letters, parts)
122
123
124def split_entry_text(str):
125 if '<' in str:
126 m = _rmtt.match(str)
127 if m:
Fred Drake3d422662001-12-26 19:55:14 +0000128 str = null_join(m.group(1, 2, 3))
Fred Drake058068d1998-04-08 23:12:51 +0000129 return split_entry(str, 1)
130
131
Fred Drakeec561091998-03-27 05:25:43 +0000132def load(fp):
133 nodes = []
Fred Drake058068d1998-04-08 23:12:51 +0000134 rx = re.compile("(.*)\1(.*)###(.*)$")
Fred Drakeec561091998-03-27 05:25:43 +0000135 while 1:
136 line = fp.readline()
137 if not line:
138 break
139 m = rx.match(line)
140 if m:
141 link, str, seqno = m.group(1, 2, 3)
142 nodes.append(Node(link, str, seqno))
143 return nodes
144
145
Fred Drake058068d1998-04-08 23:12:51 +0000146def trim_ignored_letters(s):
Fred Drake3b074801999-01-04 22:00:56 +0000147 # ignore $ to keep environment variables with the
148 # leading letter from the name
Fred Drake3d422662001-12-26 19:55:14 +0000149 if s.startswith("$"):
150 return s[1:].lower()
Fred Drake3b074801999-01-04 22:00:56 +0000151 else:
Fred Drake3d422662001-12-26 19:55:14 +0000152 return s.lower()
Fred Drake058068d1998-04-08 23:12:51 +0000153
154def get_first_letter(s):
Fred Drake3d422662001-12-26 19:55:14 +0000155 if s.startswith("<tex2html_percent_mark>"):
156 return "%"
157 else:
158 return trim_ignored_letters(s)[0]
Fred Drake058068d1998-04-08 23:12:51 +0000159
160
Fred Drakeec561091998-03-27 05:25:43 +0000161def split_letters(nodes):
162 letter_groups = []
Fred Drakeec561091998-03-27 05:25:43 +0000163 if nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000164 group = []
165 append = group.append
166 letter = get_first_letter(nodes[0].text[0])
Fred Drakeec561091998-03-27 05:25:43 +0000167 letter_groups.append((letter, group))
168 for node in nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000169 nletter = get_first_letter(node.text[0])
Fred Drakeec561091998-03-27 05:25:43 +0000170 if letter != nletter:
171 letter = nletter
172 group = []
173 letter_groups.append((letter, group))
174 append = group.append
175 append(node)
176 return letter_groups
177
178
Fred Drake3d422662001-12-26 19:55:14 +0000179def group_symbols(groups):
180 entries = []
181 ident_letters = string.ascii_letters + "_"
182 while groups[0][0] not in ident_letters:
183 entries += groups[0][1]
184 del groups[0]
185 if entries:
186 groups.insert(0, ("Symbols", entries))
187
188
Fred Drake7cbf4621998-08-07 19:50:13 +0000189# need a function to separate the nodes into columns...
190def split_columns(nodes, columns=1):
191 if columns <= 1:
Fred Drake077fffa1998-08-07 20:49:54 +0000192 return [nodes]
Fred Drake7cbf4621998-08-07 19:50:13 +0000193 # This is a rough height; we may have to increase to avoid breaks before
194 # a subitem.
Fred Drake3d422662001-12-26 19:55:14 +0000195 colheight = int(len(nodes) / columns)
196 numlong = int(len(nodes) % columns)
Fred Drake7cbf4621998-08-07 19:50:13 +0000197 if numlong:
198 colheight = colheight + 1
199 else:
200 numlong = columns
201 cols = []
202 for i in range(numlong):
203 start = i * colheight
204 end = start + colheight
205 cols.append(nodes[start:end])
206 del nodes[:end]
207 colheight = colheight - 1
208 try:
Fred Drake3d422662001-12-26 19:55:14 +0000209 numshort = int(len(nodes) / colheight)
Fred Drake7cbf4621998-08-07 19:50:13 +0000210 except ZeroDivisionError:
211 cols = cols + (columns - len(cols)) * [[]]
212 else:
213 for i in range(numshort):
214 start = i * colheight
215 end = start + colheight
216 cols.append(nodes[start:end])
Fred Drakeba828782000-04-03 04:19:14 +0000217 #
218 # If items continue across columns, make sure they are marked
219 # as continuations so the user knows to look at the previous column.
220 #
221 for i in range(len(cols) - 1):
222 try:
223 prev = cols[i][-1]
224 next = cols[i + 1][0]
225 except IndexError:
226 return cols
227 else:
228 n = min(len(prev.key), len(next.key))
229 for j in range(n):
230 if prev.key[j] != next.key[j]:
231 break
232 next.continuation = j + 1
Fred Drake077fffa1998-08-07 20:49:54 +0000233 return cols
Fred Drake7cbf4621998-08-07 19:50:13 +0000234
235
Fred Drake058068d1998-04-08 23:12:51 +0000236DL_LEVEL_INDENT = " "
237
Fred Drake7cbf4621998-08-07 19:50:13 +0000238def format_column(nodes):
Fred Drake39724572003-11-25 16:21:00 +0000239 strings = ["<dl compact='compact'>"]
Fred Drakeec561091998-03-27 05:25:43 +0000240 append = strings.append
Fred Drake7cbf4621998-08-07 19:50:13 +0000241 level = 0
Fred Drake058068d1998-04-08 23:12:51 +0000242 previous = []
Fred Drakeec561091998-03-27 05:25:43 +0000243 for node in nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000244 current = node.text
245 count = 0
246 for i in range(min(len(current), len(previous))):
247 if previous[i] != current[i]:
248 break
249 count = i + 1
250 if count > level:
Fred Drake39724572003-11-25 16:21:00 +0000251 append("<dl compact='compact'>" * (count - level) + "\n")
Fred Drake058068d1998-04-08 23:12:51 +0000252 level = count
253 elif level > count:
254 append("\n")
255 append(level * DL_LEVEL_INDENT)
256 append("</dl>" * (level - count))
257 level = count
258 # else: level == count
259 for i in range(count, len(current) - 1):
260 term = node.text[i]
261 level = level + 1
Fred Drakeba828782000-04-03 04:19:14 +0000262 if node.continuation > i:
263 extra = " (continued)"
264 else:
265 extra = ""
Fred Drake39724572003-11-25 16:21:00 +0000266 append("\n<dt>%s%s\n<dd>\n%s<dl compact='compact'>"
Fred Drakeba828782000-04-03 04:19:14 +0000267 % (term, extra, level * DL_LEVEL_INDENT))
Fred Drake058068d1998-04-08 23:12:51 +0000268 append("\n%s<dt>%s%s</a>"
269 % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1]))
Fred Drakeec561091998-03-27 05:25:43 +0000270 for link in node.links[1:]:
Fred Drake058068d1998-04-08 23:12:51 +0000271 append(",\n%s %s[Link]</a>" % (level * DL_LEVEL_INDENT, link))
272 previous = current
273 append("\n")
Fred Drake2b8c95e1998-04-11 16:26:02 +0000274 append("</dl>" * (level + 1))
Fred Drake3d422662001-12-26 19:55:14 +0000275 return null_join(strings)
Fred Drake7cbf4621998-08-07 19:50:13 +0000276
277
278def format_nodes(nodes, columns=1):
279 strings = []
280 append = strings.append
281 if columns > 1:
282 colnos = range(columns)
Fred Drake3d422662001-12-26 19:55:14 +0000283 colheight = int(len(nodes) / columns)
Fred Drake7cbf4621998-08-07 19:50:13 +0000284 if len(nodes) % columns:
285 colheight = colheight + 1
Fred Drake3d422662001-12-26 19:55:14 +0000286 colwidth = int(100 / columns)
Fred Drake7cbf4621998-08-07 19:50:13 +0000287 append('<table width="100%"><tr valign="top">')
288 for col in split_columns(nodes, columns):
289 append('<td width="%d%%">\n' % colwidth)
290 append(format_column(col))
291 append("\n</td>")
292 append("\n</tr></table>")
293 else:
294 append(format_column(nodes))
Fred Drake3d422662001-12-26 19:55:14 +0000295 return null_join(strings)
Fred Drakeec561091998-03-27 05:25:43 +0000296
297
298def format_letter(letter):
299 if letter == '.':
300 lettername = ". (dot)"
301 elif letter == '_':
302 lettername = "_ (underscore)"
303 else:
Fred Drake3d422662001-12-26 19:55:14 +0000304 lettername = letter.capitalize()
Fred Drake39724572003-11-25 16:21:00 +0000305 return "\n<hr />\n<h2 id=%s>%s</h2>\n\n" \
306 % (quoteattr("letter-" + letter), lettername)
Fred Drakeec561091998-03-27 05:25:43 +0000307
308
Fred Drake3d422662001-12-26 19:55:14 +0000309def format_html_letters(nodes, columns, group_symbol_nodes):
Fred Drakeec561091998-03-27 05:25:43 +0000310 letter_groups = split_letters(nodes)
Fred Drake3d422662001-12-26 19:55:14 +0000311 if group_symbol_nodes:
312 group_symbols(letter_groups)
Fred Drakeec561091998-03-27 05:25:43 +0000313 items = []
314 for letter, nodes in letter_groups:
315 s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
316 items.append(s)
Fred Drake39724572003-11-25 16:21:00 +0000317 s = ["<hr /><center>\n%s</center>\n" % " |\n".join(items)]
Fred Drakeec561091998-03-27 05:25:43 +0000318 for letter, nodes in letter_groups:
Fred Drake058068d1998-04-08 23:12:51 +0000319 s.append(format_letter(letter))
Fred Drake7cbf4621998-08-07 19:50:13 +0000320 s.append(format_nodes(nodes, columns))
Fred Drake3d422662001-12-26 19:55:14 +0000321 return null_join(s)
Fred Drakeec561091998-03-27 05:25:43 +0000322
Fred Drake077fffa1998-08-07 20:49:54 +0000323def format_html(nodes, columns):
324 return format_nodes(nodes, columns)
325
Fred Drakeec561091998-03-27 05:25:43 +0000326
327def collapse(nodes):
328 """Collapse sequences of nodes with matching keys into a single node.
329 Destructive."""
330 if len(nodes) < 2:
331 return
332 prev = nodes[0]
333 i = 1
334 while i < len(nodes):
335 node = nodes[i]
336 if not node.cmp_entry(prev):
337 prev.links.append(node.links[0])
338 del nodes[i]
Fred Drakeec561091998-03-27 05:25:43 +0000339 else:
340 i = i + 1
341 prev = node
342
343
344def dump(nodes, fp):
345 for node in nodes:
346 fp.write(node.dump())
347
348
Fred Drake3d422662001-12-26 19:55:14 +0000349def process_nodes(nodes, columns, letters=0, group_symbol_nodes=0):
Fred Drake711fe021999-02-24 16:36:48 +0000350 nodes.sort()
351 collapse(nodes)
352 if letters:
Fred Drake3d422662001-12-26 19:55:14 +0000353 return format_html_letters(nodes, columns, group_symbol_nodes)
Fred Drake711fe021999-02-24 16:36:48 +0000354 else:
355 return format_html(nodes, columns)
356
357
Fred Drakeec561091998-03-27 05:25:43 +0000358def main():
Fred Drake058068d1998-04-08 23:12:51 +0000359 import getopt
360 ifn = "-"
361 ofn = "-"
Fred Drake7cbf4621998-08-07 19:50:13 +0000362 columns = 1
Fred Drake077fffa1998-08-07 20:49:54 +0000363 letters = 0
Fred Drake3d422662001-12-26 19:55:14 +0000364 group_symbol_nodes = 1
Fred Drake077fffa1998-08-07 20:49:54 +0000365 opts, args = getopt.getopt(sys.argv[1:], "c:lo:",
Fred Drake3d422662001-12-26 19:55:14 +0000366 ["columns=", "dont-group-symbols",
367 "group-symbols", "letters", "output="])
Fred Drake058068d1998-04-08 23:12:51 +0000368 for opt, val in opts:
369 if opt in ("-o", "--output"):
370 ofn = val
Fred Drake7cbf4621998-08-07 19:50:13 +0000371 elif opt in ("-c", "--columns"):
Fred Drake3d422662001-12-26 19:55:14 +0000372 columns = int(val, 10)
Fred Drake077fffa1998-08-07 20:49:54 +0000373 elif opt in ("-l", "--letters"):
374 letters = 1
Fred Drake3d422662001-12-26 19:55:14 +0000375 elif opt == "--group-symbols":
376 group_symbol_nodes = 1
377 elif opt == "--dont-group-symbols":
378 group_symbol_nodes = 0
Fred Drake058068d1998-04-08 23:12:51 +0000379 if not args:
380 args = [ifn]
381 nodes = []
382 for fn in args:
383 nodes = nodes + load(open(fn))
Fred Drake03a02351998-12-28 20:46:53 +0000384 num_nodes = len(nodes)
Fred Drake3d422662001-12-26 19:55:14 +0000385 html = process_nodes(nodes, columns, letters, group_symbol_nodes)
Fred Drake03a02351998-12-28 20:46:53 +0000386 program = os.path.basename(sys.argv[0])
Fred Drake058068d1998-04-08 23:12:51 +0000387 if ofn == "-":
388 sys.stdout.write(html)
Fred Drake03a02351998-12-28 20:46:53 +0000389 sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes))
Fred Drake058068d1998-04-08 23:12:51 +0000390 else:
391 open(ofn, "w").write(html)
Collin Winter65d09d42007-03-21 02:11:39 +0000392 print()
393 print("%s: %d index nodes" % (program, num_nodes))
Fred Drakeec561091998-03-27 05:25:43 +0000394
395
396if __name__ == "__main__":
397 main()