blob: 5870462cd5489f0cbb9e012e343adb37c9ce54e2 [file] [log] [blame]
Fred Drakeec561091998-03-27 05:25:43 +00001#! /usr/bin/env python
2
Fred Drakeec561091998-03-27 05:25:43 +00003__version__ = '$Revision$'
4
Fred Drake071972e2002-10-16 15:30:17 +00005import os.path
Fred Drakeec561091998-03-27 05:25:43 +00006import re
7import string
8import sys
9
Fred Drake39724572003-11-25 16:21:00 +000010from xml.sax.saxutils import quoteattr
11
Fred Drakeec561091998-03-27 05:25:43 +000012
Fred Drake3d422662001-12-26 19:55:14 +000013bang_join = "!".join
14null_join = "".join
15
Fred Drake63a01912004-07-08 03:56:12 +000016REPLACEMENTS = [
17 # Hackish way to deal with macros replaced with simple text
18 (re.compile(r"\\ABC\b"), "ABC"),
19 (re.compile(r"\\ASCII\b"), "ASCII"),
20 (re.compile(r"\\Cpp\b"), "C++"),
21 (re.compile(r"\\EOF\b"), "EOF"),
22 (re.compile(r"\\NULL\b"), "NULL"),
23 (re.compile(r"\\POSIX\b"), "POSIX"),
24 (re.compile(r"\\UNIX\b"), "Unix"),
25 # deal with turds left over from LaTeX2HTML
26 (re.compile(r"<#\d+#>"), ""),
27 ]
Fred Drake3d422662001-12-26 19:55:14 +000028
Fred Drakeec561091998-03-27 05:25:43 +000029class Node:
Fred Drakeba828782000-04-03 04:19:14 +000030 continuation = 0
31
Fred Drakeec561091998-03-27 05:25:43 +000032 def __init__(self, link, str, seqno):
33 self.links = [link]
34 self.seqno = seqno
Fred Drake63a01912004-07-08 03:56:12 +000035 for pattern, replacement in REPLACEMENTS:
36 str = pattern.sub(replacement, str)
Fred Drakeec561091998-03-27 05:25:43 +000037 # build up the text
Fred Drake058068d1998-04-08 23:12:51 +000038 self.text = split_entry_text(str)
39 self.key = split_entry_key(str)
Fred Drakeec561091998-03-27 05:25:43 +000040
41 def __cmp__(self, other):
42 """Comparison operator includes sequence number, for use with
43 list.sort()."""
44 return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
45
46 def cmp_entry(self, other):
47 """Comparison 'operator' that ignores sequence number."""
Fred Drake058068d1998-04-08 23:12:51 +000048 c = 0
Fred Drakeec561091998-03-27 05:25:43 +000049 for i in range(min(len(self.key), len(other.key))):
Fred Drake058068d1998-04-08 23:12:51 +000050 c = (cmp_part(self.key[i], other.key[i])
51 or cmp_part(self.text[i], other.text[i]))
Fred Drakeec561091998-03-27 05:25:43 +000052 if c:
Fred Drake058068d1998-04-08 23:12:51 +000053 break
54 return c or cmp(self.key, other.key) or cmp(self.text, other.text)
Fred Drakeec561091998-03-27 05:25:43 +000055
56 def __repr__(self):
Fred Drake3d422662001-12-26 19:55:14 +000057 return "<Node for %s (%s)>" % (bang_join(self.text), self.seqno)
Fred Drakeec561091998-03-27 05:25:43 +000058
59 def __str__(self):
Fred Drake3d422662001-12-26 19:55:14 +000060 return bang_join(self.key)
Fred Drakeec561091998-03-27 05:25:43 +000061
62 def dump(self):
Fred Drake058068d1998-04-08 23:12:51 +000063 return "%s\1%s###%s\n" \
Fred Drake071972e2002-10-16 15:30:17 +000064 % ("\1".join(self.links),
Fred Drake3d422662001-12-26 19:55:14 +000065 bang_join(self.text),
Fred Drakeec561091998-03-27 05:25:43 +000066 self.seqno)
67
68
Fred Drake058068d1998-04-08 23:12:51 +000069def cmp_part(s1, s2):
70 result = cmp(s1, s2)
71 if result == 0:
72 return 0
Fred Drake3d422662001-12-26 19:55:14 +000073 l1 = s1.lower()
74 l2 = s2.lower()
Fred Drake058068d1998-04-08 23:12:51 +000075 minlen = min(len(s1), len(s2))
76 if len(s1) < len(s2) and l1 == l2[:len(s1)]:
77 result = -1
78 elif len(s2) < len(s1) and l2 == l1[:len(s2)]:
79 result = 1
80 else:
81 result = cmp(l1, l2) or cmp(s1, s2)
82 return result
83
84
85def split_entry(str, which):
86 stuff = []
Fred Drake3d422662001-12-26 19:55:14 +000087 parts = str.split('!')
88 parts = [part.split('@') for part in parts]
Fred Drake058068d1998-04-08 23:12:51 +000089 for entry in parts:
90 if len(entry) != 1:
91 key = entry[which]
92 else:
93 key = entry[0]
94 stuff.append(key)
95 return stuff
96
97
Fred Drakeba828782000-04-03 04:19:14 +000098_rmtt = re.compile(r"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""",
Fred Drake4cc902f1999-02-18 16:11:12 +000099 re.IGNORECASE)
Fred Drake058068d1998-04-08 23:12:51 +0000100_rmparens = re.compile(r"\(\)")
101
102def split_entry_key(str):
103 parts = split_entry(str, 1)
104 for i in range(len(parts)):
105 m = _rmtt.match(parts[i])
106 if m:
Fred Drake3d422662001-12-26 19:55:14 +0000107 parts[i] = null_join(m.group(1, 2, 3))
Fred Drake058068d1998-04-08 23:12:51 +0000108 else:
Fred Drake3d422662001-12-26 19:55:14 +0000109 parts[i] = parts[i].lower()
Fred Drake058068d1998-04-08 23:12:51 +0000110 # remove '()' from the key:
111 parts[i] = _rmparens.sub('', parts[i])
112 return map(trim_ignored_letters, parts)
113
114
115def split_entry_text(str):
116 if '<' in str:
117 m = _rmtt.match(str)
118 if m:
Fred Drake3d422662001-12-26 19:55:14 +0000119 str = null_join(m.group(1, 2, 3))
Fred Drake058068d1998-04-08 23:12:51 +0000120 return split_entry(str, 1)
121
122
Fred Drakeec561091998-03-27 05:25:43 +0000123def load(fp):
124 nodes = []
Fred Drake058068d1998-04-08 23:12:51 +0000125 rx = re.compile("(.*)\1(.*)###(.*)$")
Fred Drakeec561091998-03-27 05:25:43 +0000126 while 1:
127 line = fp.readline()
128 if not line:
129 break
130 m = rx.match(line)
131 if m:
132 link, str, seqno = m.group(1, 2, 3)
133 nodes.append(Node(link, str, seqno))
134 return nodes
135
136
Fred Drake058068d1998-04-08 23:12:51 +0000137def trim_ignored_letters(s):
Fred Drake3b074801999-01-04 22:00:56 +0000138 # ignore $ to keep environment variables with the
139 # leading letter from the name
Fred Drake3d422662001-12-26 19:55:14 +0000140 if s.startswith("$"):
141 return s[1:].lower()
Fred Drake3b074801999-01-04 22:00:56 +0000142 else:
Fred Drake3d422662001-12-26 19:55:14 +0000143 return s.lower()
Fred Drake058068d1998-04-08 23:12:51 +0000144
145def get_first_letter(s):
Fred Drake3d422662001-12-26 19:55:14 +0000146 if s.startswith("<tex2html_percent_mark>"):
147 return "%"
148 else:
149 return trim_ignored_letters(s)[0]
Fred Drake058068d1998-04-08 23:12:51 +0000150
151
Fred Drakeec561091998-03-27 05:25:43 +0000152def split_letters(nodes):
153 letter_groups = []
Fred Drakeec561091998-03-27 05:25:43 +0000154 if nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000155 group = []
156 append = group.append
157 letter = get_first_letter(nodes[0].text[0])
Fred Drakeec561091998-03-27 05:25:43 +0000158 letter_groups.append((letter, group))
159 for node in nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000160 nletter = get_first_letter(node.text[0])
Fred Drakeec561091998-03-27 05:25:43 +0000161 if letter != nletter:
162 letter = nletter
163 group = []
164 letter_groups.append((letter, group))
165 append = group.append
166 append(node)
167 return letter_groups
168
169
Fred Drake3d422662001-12-26 19:55:14 +0000170def group_symbols(groups):
171 entries = []
172 ident_letters = string.ascii_letters + "_"
173 while groups[0][0] not in ident_letters:
174 entries += groups[0][1]
175 del groups[0]
176 if entries:
177 groups.insert(0, ("Symbols", entries))
178
179
Fred Drake7cbf4621998-08-07 19:50:13 +0000180# need a function to separate the nodes into columns...
181def split_columns(nodes, columns=1):
182 if columns <= 1:
Fred Drake077fffa1998-08-07 20:49:54 +0000183 return [nodes]
Fred Drake7cbf4621998-08-07 19:50:13 +0000184 # This is a rough height; we may have to increase to avoid breaks before
185 # a subitem.
Fred Drake3d422662001-12-26 19:55:14 +0000186 colheight = int(len(nodes) / columns)
187 numlong = int(len(nodes) % columns)
Fred Drake7cbf4621998-08-07 19:50:13 +0000188 if numlong:
189 colheight = colheight + 1
190 else:
191 numlong = columns
192 cols = []
193 for i in range(numlong):
194 start = i * colheight
195 end = start + colheight
196 cols.append(nodes[start:end])
197 del nodes[:end]
198 colheight = colheight - 1
199 try:
Fred Drake3d422662001-12-26 19:55:14 +0000200 numshort = int(len(nodes) / colheight)
Fred Drake7cbf4621998-08-07 19:50:13 +0000201 except ZeroDivisionError:
202 cols = cols + (columns - len(cols)) * [[]]
203 else:
204 for i in range(numshort):
205 start = i * colheight
206 end = start + colheight
207 cols.append(nodes[start:end])
Fred Drakeba828782000-04-03 04:19:14 +0000208 #
209 # If items continue across columns, make sure they are marked
210 # as continuations so the user knows to look at the previous column.
211 #
212 for i in range(len(cols) - 1):
213 try:
214 prev = cols[i][-1]
215 next = cols[i + 1][0]
216 except IndexError:
217 return cols
218 else:
219 n = min(len(prev.key), len(next.key))
220 for j in range(n):
221 if prev.key[j] != next.key[j]:
222 break
223 next.continuation = j + 1
Fred Drake077fffa1998-08-07 20:49:54 +0000224 return cols
Fred Drake7cbf4621998-08-07 19:50:13 +0000225
226
Fred Drake058068d1998-04-08 23:12:51 +0000227DL_LEVEL_INDENT = " "
228
Fred Drake7cbf4621998-08-07 19:50:13 +0000229def format_column(nodes):
Fred Drake39724572003-11-25 16:21:00 +0000230 strings = ["<dl compact='compact'>"]
Fred Drakeec561091998-03-27 05:25:43 +0000231 append = strings.append
Fred Drake7cbf4621998-08-07 19:50:13 +0000232 level = 0
Fred Drake058068d1998-04-08 23:12:51 +0000233 previous = []
Fred Drakeec561091998-03-27 05:25:43 +0000234 for node in nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000235 current = node.text
236 count = 0
237 for i in range(min(len(current), len(previous))):
238 if previous[i] != current[i]:
239 break
240 count = i + 1
241 if count > level:
Fred Drake39724572003-11-25 16:21:00 +0000242 append("<dl compact='compact'>" * (count - level) + "\n")
Fred Drake058068d1998-04-08 23:12:51 +0000243 level = count
244 elif level > count:
245 append("\n")
246 append(level * DL_LEVEL_INDENT)
247 append("</dl>" * (level - count))
248 level = count
249 # else: level == count
250 for i in range(count, len(current) - 1):
251 term = node.text[i]
252 level = level + 1
Fred Drakeba828782000-04-03 04:19:14 +0000253 if node.continuation > i:
254 extra = " (continued)"
255 else:
256 extra = ""
Fred Drake39724572003-11-25 16:21:00 +0000257 append("\n<dt>%s%s\n<dd>\n%s<dl compact='compact'>"
Fred Drakeba828782000-04-03 04:19:14 +0000258 % (term, extra, level * DL_LEVEL_INDENT))
Fred Drake058068d1998-04-08 23:12:51 +0000259 append("\n%s<dt>%s%s</a>"
260 % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1]))
Fred Drakeec561091998-03-27 05:25:43 +0000261 for link in node.links[1:]:
Fred Drake058068d1998-04-08 23:12:51 +0000262 append(",\n%s %s[Link]</a>" % (level * DL_LEVEL_INDENT, link))
263 previous = current
264 append("\n")
Fred Drake2b8c95e1998-04-11 16:26:02 +0000265 append("</dl>" * (level + 1))
Fred Drake3d422662001-12-26 19:55:14 +0000266 return null_join(strings)
Fred Drake7cbf4621998-08-07 19:50:13 +0000267
268
269def format_nodes(nodes, columns=1):
270 strings = []
271 append = strings.append
272 if columns > 1:
273 colnos = range(columns)
Fred Drake3d422662001-12-26 19:55:14 +0000274 colheight = int(len(nodes) / columns)
Fred Drake7cbf4621998-08-07 19:50:13 +0000275 if len(nodes) % columns:
276 colheight = colheight + 1
Fred Drake3d422662001-12-26 19:55:14 +0000277 colwidth = int(100 / columns)
Fred Drake7cbf4621998-08-07 19:50:13 +0000278 append('<table width="100%"><tr valign="top">')
279 for col in split_columns(nodes, columns):
280 append('<td width="%d%%">\n' % colwidth)
281 append(format_column(col))
282 append("\n</td>")
283 append("\n</tr></table>")
284 else:
285 append(format_column(nodes))
Fred Drake3d422662001-12-26 19:55:14 +0000286 return null_join(strings)
Fred Drakeec561091998-03-27 05:25:43 +0000287
288
289def format_letter(letter):
290 if letter == '.':
291 lettername = ". (dot)"
292 elif letter == '_':
293 lettername = "_ (underscore)"
294 else:
Fred Drake3d422662001-12-26 19:55:14 +0000295 lettername = letter.capitalize()
Fred Drake39724572003-11-25 16:21:00 +0000296 return "\n<hr />\n<h2 id=%s>%s</h2>\n\n" \
297 % (quoteattr("letter-" + letter), lettername)
Fred Drakeec561091998-03-27 05:25:43 +0000298
299
Fred Drake3d422662001-12-26 19:55:14 +0000300def format_html_letters(nodes, columns, group_symbol_nodes):
Fred Drakeec561091998-03-27 05:25:43 +0000301 letter_groups = split_letters(nodes)
Fred Drake3d422662001-12-26 19:55:14 +0000302 if group_symbol_nodes:
303 group_symbols(letter_groups)
Fred Drakeec561091998-03-27 05:25:43 +0000304 items = []
305 for letter, nodes in letter_groups:
306 s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
307 items.append(s)
Fred Drake39724572003-11-25 16:21:00 +0000308 s = ["<hr /><center>\n%s</center>\n" % " |\n".join(items)]
Fred Drakeec561091998-03-27 05:25:43 +0000309 for letter, nodes in letter_groups:
Fred Drake058068d1998-04-08 23:12:51 +0000310 s.append(format_letter(letter))
Fred Drake7cbf4621998-08-07 19:50:13 +0000311 s.append(format_nodes(nodes, columns))
Fred Drake3d422662001-12-26 19:55:14 +0000312 return null_join(s)
Fred Drakeec561091998-03-27 05:25:43 +0000313
Fred Drake077fffa1998-08-07 20:49:54 +0000314def format_html(nodes, columns):
315 return format_nodes(nodes, columns)
316
Fred Drakeec561091998-03-27 05:25:43 +0000317
318def collapse(nodes):
319 """Collapse sequences of nodes with matching keys into a single node.
320 Destructive."""
321 if len(nodes) < 2:
322 return
323 prev = nodes[0]
324 i = 1
325 while i < len(nodes):
326 node = nodes[i]
327 if not node.cmp_entry(prev):
328 prev.links.append(node.links[0])
329 del nodes[i]
Fred Drakeec561091998-03-27 05:25:43 +0000330 else:
331 i = i + 1
332 prev = node
333
334
335def dump(nodes, fp):
336 for node in nodes:
337 fp.write(node.dump())
338
339
Fred Drake3d422662001-12-26 19:55:14 +0000340def process_nodes(nodes, columns, letters=0, group_symbol_nodes=0):
Fred Drake711fe021999-02-24 16:36:48 +0000341 nodes.sort()
342 collapse(nodes)
343 if letters:
Fred Drake3d422662001-12-26 19:55:14 +0000344 return format_html_letters(nodes, columns, group_symbol_nodes)
Fred Drake711fe021999-02-24 16:36:48 +0000345 else:
346 return format_html(nodes, columns)
347
348
Fred Drakeec561091998-03-27 05:25:43 +0000349def main():
Fred Drake058068d1998-04-08 23:12:51 +0000350 import getopt
351 ifn = "-"
352 ofn = "-"
Fred Drake7cbf4621998-08-07 19:50:13 +0000353 columns = 1
Fred Drake077fffa1998-08-07 20:49:54 +0000354 letters = 0
Fred Drake3d422662001-12-26 19:55:14 +0000355 group_symbol_nodes = 1
Fred Drake077fffa1998-08-07 20:49:54 +0000356 opts, args = getopt.getopt(sys.argv[1:], "c:lo:",
Fred Drake3d422662001-12-26 19:55:14 +0000357 ["columns=", "dont-group-symbols",
358 "group-symbols", "letters", "output="])
Fred Drake058068d1998-04-08 23:12:51 +0000359 for opt, val in opts:
360 if opt in ("-o", "--output"):
361 ofn = val
Fred Drake7cbf4621998-08-07 19:50:13 +0000362 elif opt in ("-c", "--columns"):
Fred Drake3d422662001-12-26 19:55:14 +0000363 columns = int(val, 10)
Fred Drake077fffa1998-08-07 20:49:54 +0000364 elif opt in ("-l", "--letters"):
365 letters = 1
Fred Drake3d422662001-12-26 19:55:14 +0000366 elif opt == "--group-symbols":
367 group_symbol_nodes = 1
368 elif opt == "--dont-group-symbols":
369 group_symbol_nodes = 0
Fred Drake058068d1998-04-08 23:12:51 +0000370 if not args:
371 args = [ifn]
372 nodes = []
373 for fn in args:
374 nodes = nodes + load(open(fn))
Fred Drake03a02351998-12-28 20:46:53 +0000375 num_nodes = len(nodes)
Fred Drake3d422662001-12-26 19:55:14 +0000376 html = process_nodes(nodes, columns, letters, group_symbol_nodes)
Fred Drake03a02351998-12-28 20:46:53 +0000377 program = os.path.basename(sys.argv[0])
Fred Drake058068d1998-04-08 23:12:51 +0000378 if ofn == "-":
379 sys.stdout.write(html)
Fred Drake03a02351998-12-28 20:46:53 +0000380 sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes))
Fred Drake058068d1998-04-08 23:12:51 +0000381 else:
382 open(ofn, "w").write(html)
Fred Drake03a02351998-12-28 20:46:53 +0000383 print
384 print "%s: %d index nodes" % (program, num_nodes)
Fred Drakeec561091998-03-27 05:25:43 +0000385
386
387if __name__ == "__main__":
388 main()