blob: a07ed2faac714e42744146e447a14bceed3725ef [file] [log] [blame]
Fred Drakeec561091998-03-27 05:25:43 +00001#! /usr/bin/env python
2
Fred Drakeec561091998-03-27 05:25:43 +00003__version__ = '$Revision$'
4
Fred Drake071972e2002-10-16 15:30:17 +00005import os.path
Fred Drakeec561091998-03-27 05:25:43 +00006import re
7import string
8import sys
9
Fred Drake39724572003-11-25 16:21:00 +000010from xml.sax.saxutils import quoteattr
11
Fred Drakeec561091998-03-27 05:25:43 +000012
Fred Drake3d422662001-12-26 19:55:14 +000013bang_join = "!".join
14null_join = "".join
15
16
Fred Drakeec561091998-03-27 05:25:43 +000017class Node:
Fred Drakeec561091998-03-27 05:25:43 +000018 __rmjunk = re.compile("<#\d+#>")
19
Fred Drakeba828782000-04-03 04:19:14 +000020 continuation = 0
21
Fred Drakeec561091998-03-27 05:25:43 +000022 def __init__(self, link, str, seqno):
23 self.links = [link]
24 self.seqno = seqno
25 # remove <#\d+#> left in by moving the data out of LaTeX2HTML
26 str = self.__rmjunk.sub('', str)
Fred Drakeec561091998-03-27 05:25:43 +000027 # build up the text
Fred Drake058068d1998-04-08 23:12:51 +000028 self.text = split_entry_text(str)
29 self.key = split_entry_key(str)
Fred Drakeec561091998-03-27 05:25:43 +000030
31 def __cmp__(self, other):
32 """Comparison operator includes sequence number, for use with
33 list.sort()."""
34 return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
35
36 def cmp_entry(self, other):
37 """Comparison 'operator' that ignores sequence number."""
Fred Drake058068d1998-04-08 23:12:51 +000038 c = 0
Fred Drakeec561091998-03-27 05:25:43 +000039 for i in range(min(len(self.key), len(other.key))):
Fred Drake058068d1998-04-08 23:12:51 +000040 c = (cmp_part(self.key[i], other.key[i])
41 or cmp_part(self.text[i], other.text[i]))
Fred Drakeec561091998-03-27 05:25:43 +000042 if c:
Fred Drake058068d1998-04-08 23:12:51 +000043 break
44 return c or cmp(self.key, other.key) or cmp(self.text, other.text)
Fred Drakeec561091998-03-27 05:25:43 +000045
46 def __repr__(self):
Fred Drake3d422662001-12-26 19:55:14 +000047 return "<Node for %s (%s)>" % (bang_join(self.text), self.seqno)
Fred Drakeec561091998-03-27 05:25:43 +000048
49 def __str__(self):
Fred Drake3d422662001-12-26 19:55:14 +000050 return bang_join(self.key)
Fred Drakeec561091998-03-27 05:25:43 +000051
52 def dump(self):
Fred Drake058068d1998-04-08 23:12:51 +000053 return "%s\1%s###%s\n" \
Fred Drake071972e2002-10-16 15:30:17 +000054 % ("\1".join(self.links),
Fred Drake3d422662001-12-26 19:55:14 +000055 bang_join(self.text),
Fred Drakeec561091998-03-27 05:25:43 +000056 self.seqno)
57
58
Fred Drake058068d1998-04-08 23:12:51 +000059def cmp_part(s1, s2):
60 result = cmp(s1, s2)
61 if result == 0:
62 return 0
Fred Drake3d422662001-12-26 19:55:14 +000063 l1 = s1.lower()
64 l2 = s2.lower()
Fred Drake058068d1998-04-08 23:12:51 +000065 minlen = min(len(s1), len(s2))
66 if len(s1) < len(s2) and l1 == l2[:len(s1)]:
67 result = -1
68 elif len(s2) < len(s1) and l2 == l1[:len(s2)]:
69 result = 1
70 else:
71 result = cmp(l1, l2) or cmp(s1, s2)
72 return result
73
74
75def split_entry(str, which):
76 stuff = []
Fred Drake3d422662001-12-26 19:55:14 +000077 parts = str.split('!')
78 parts = [part.split('@') for part in parts]
Fred Drake058068d1998-04-08 23:12:51 +000079 for entry in parts:
80 if len(entry) != 1:
81 key = entry[which]
82 else:
83 key = entry[0]
84 stuff.append(key)
85 return stuff
86
87
Fred Drakeba828782000-04-03 04:19:14 +000088_rmtt = re.compile(r"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""",
Fred Drake4cc902f1999-02-18 16:11:12 +000089 re.IGNORECASE)
Fred Drake058068d1998-04-08 23:12:51 +000090_rmparens = re.compile(r"\(\)")
91
92def split_entry_key(str):
93 parts = split_entry(str, 1)
94 for i in range(len(parts)):
95 m = _rmtt.match(parts[i])
96 if m:
Fred Drake3d422662001-12-26 19:55:14 +000097 parts[i] = null_join(m.group(1, 2, 3))
Fred Drake058068d1998-04-08 23:12:51 +000098 else:
Fred Drake3d422662001-12-26 19:55:14 +000099 parts[i] = parts[i].lower()
Fred Drake058068d1998-04-08 23:12:51 +0000100 # remove '()' from the key:
101 parts[i] = _rmparens.sub('', parts[i])
102 return map(trim_ignored_letters, parts)
103
104
105def split_entry_text(str):
106 if '<' in str:
107 m = _rmtt.match(str)
108 if m:
Fred Drake3d422662001-12-26 19:55:14 +0000109 str = null_join(m.group(1, 2, 3))
Fred Drake058068d1998-04-08 23:12:51 +0000110 return split_entry(str, 1)
111
112
Fred Drakeec561091998-03-27 05:25:43 +0000113def load(fp):
114 nodes = []
Fred Drake058068d1998-04-08 23:12:51 +0000115 rx = re.compile("(.*)\1(.*)###(.*)$")
Fred Drakeec561091998-03-27 05:25:43 +0000116 while 1:
117 line = fp.readline()
118 if not line:
119 break
120 m = rx.match(line)
121 if m:
122 link, str, seqno = m.group(1, 2, 3)
123 nodes.append(Node(link, str, seqno))
124 return nodes
125
126
Fred Drake058068d1998-04-08 23:12:51 +0000127def trim_ignored_letters(s):
Fred Drake3b074801999-01-04 22:00:56 +0000128 # ignore $ to keep environment variables with the
129 # leading letter from the name
Fred Drake3d422662001-12-26 19:55:14 +0000130 if s.startswith("$"):
131 return s[1:].lower()
Fred Drake3b074801999-01-04 22:00:56 +0000132 else:
Fred Drake3d422662001-12-26 19:55:14 +0000133 return s.lower()
Fred Drake058068d1998-04-08 23:12:51 +0000134
135def get_first_letter(s):
Fred Drake3d422662001-12-26 19:55:14 +0000136 if s.startswith("<tex2html_percent_mark>"):
137 return "%"
138 else:
139 return trim_ignored_letters(s)[0]
Fred Drake058068d1998-04-08 23:12:51 +0000140
141
Fred Drakeec561091998-03-27 05:25:43 +0000142def split_letters(nodes):
143 letter_groups = []
Fred Drakeec561091998-03-27 05:25:43 +0000144 if nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000145 group = []
146 append = group.append
147 letter = get_first_letter(nodes[0].text[0])
Fred Drakeec561091998-03-27 05:25:43 +0000148 letter_groups.append((letter, group))
149 for node in nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000150 nletter = get_first_letter(node.text[0])
Fred Drakeec561091998-03-27 05:25:43 +0000151 if letter != nletter:
152 letter = nletter
153 group = []
154 letter_groups.append((letter, group))
155 append = group.append
156 append(node)
157 return letter_groups
158
159
Fred Drake3d422662001-12-26 19:55:14 +0000160def group_symbols(groups):
161 entries = []
162 ident_letters = string.ascii_letters + "_"
163 while groups[0][0] not in ident_letters:
164 entries += groups[0][1]
165 del groups[0]
166 if entries:
167 groups.insert(0, ("Symbols", entries))
168
169
Fred Drake7cbf4621998-08-07 19:50:13 +0000170# need a function to separate the nodes into columns...
171def split_columns(nodes, columns=1):
172 if columns <= 1:
Fred Drake077fffa1998-08-07 20:49:54 +0000173 return [nodes]
Fred Drake7cbf4621998-08-07 19:50:13 +0000174 # This is a rough height; we may have to increase to avoid breaks before
175 # a subitem.
Fred Drake3d422662001-12-26 19:55:14 +0000176 colheight = int(len(nodes) / columns)
177 numlong = int(len(nodes) % columns)
Fred Drake7cbf4621998-08-07 19:50:13 +0000178 if numlong:
179 colheight = colheight + 1
180 else:
181 numlong = columns
182 cols = []
183 for i in range(numlong):
184 start = i * colheight
185 end = start + colheight
186 cols.append(nodes[start:end])
187 del nodes[:end]
188 colheight = colheight - 1
189 try:
Fred Drake3d422662001-12-26 19:55:14 +0000190 numshort = int(len(nodes) / colheight)
Fred Drake7cbf4621998-08-07 19:50:13 +0000191 except ZeroDivisionError:
192 cols = cols + (columns - len(cols)) * [[]]
193 else:
194 for i in range(numshort):
195 start = i * colheight
196 end = start + colheight
197 cols.append(nodes[start:end])
Fred Drakeba828782000-04-03 04:19:14 +0000198 #
199 # If items continue across columns, make sure they are marked
200 # as continuations so the user knows to look at the previous column.
201 #
202 for i in range(len(cols) - 1):
203 try:
204 prev = cols[i][-1]
205 next = cols[i + 1][0]
206 except IndexError:
207 return cols
208 else:
209 n = min(len(prev.key), len(next.key))
210 for j in range(n):
211 if prev.key[j] != next.key[j]:
212 break
213 next.continuation = j + 1
Fred Drake077fffa1998-08-07 20:49:54 +0000214 return cols
Fred Drake7cbf4621998-08-07 19:50:13 +0000215
216
Fred Drake058068d1998-04-08 23:12:51 +0000217DL_LEVEL_INDENT = " "
218
Fred Drake7cbf4621998-08-07 19:50:13 +0000219def format_column(nodes):
Fred Drake39724572003-11-25 16:21:00 +0000220 strings = ["<dl compact='compact'>"]
Fred Drakeec561091998-03-27 05:25:43 +0000221 append = strings.append
Fred Drake7cbf4621998-08-07 19:50:13 +0000222 level = 0
Fred Drake058068d1998-04-08 23:12:51 +0000223 previous = []
Fred Drakeec561091998-03-27 05:25:43 +0000224 for node in nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000225 current = node.text
226 count = 0
227 for i in range(min(len(current), len(previous))):
228 if previous[i] != current[i]:
229 break
230 count = i + 1
231 if count > level:
Fred Drake39724572003-11-25 16:21:00 +0000232 append("<dl compact='compact'>" * (count - level) + "\n")
Fred Drake058068d1998-04-08 23:12:51 +0000233 level = count
234 elif level > count:
235 append("\n")
236 append(level * DL_LEVEL_INDENT)
237 append("</dl>" * (level - count))
238 level = count
239 # else: level == count
240 for i in range(count, len(current) - 1):
241 term = node.text[i]
242 level = level + 1
Fred Drakeba828782000-04-03 04:19:14 +0000243 if node.continuation > i:
244 extra = " (continued)"
245 else:
246 extra = ""
Fred Drake39724572003-11-25 16:21:00 +0000247 append("\n<dt>%s%s\n<dd>\n%s<dl compact='compact'>"
Fred Drakeba828782000-04-03 04:19:14 +0000248 % (term, extra, level * DL_LEVEL_INDENT))
Fred Drake058068d1998-04-08 23:12:51 +0000249 append("\n%s<dt>%s%s</a>"
250 % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1]))
Fred Drakeec561091998-03-27 05:25:43 +0000251 for link in node.links[1:]:
Fred Drake058068d1998-04-08 23:12:51 +0000252 append(",\n%s %s[Link]</a>" % (level * DL_LEVEL_INDENT, link))
253 previous = current
254 append("\n")
Fred Drake2b8c95e1998-04-11 16:26:02 +0000255 append("</dl>" * (level + 1))
Fred Drake3d422662001-12-26 19:55:14 +0000256 return null_join(strings)
Fred Drake7cbf4621998-08-07 19:50:13 +0000257
258
259def format_nodes(nodes, columns=1):
260 strings = []
261 append = strings.append
262 if columns > 1:
263 colnos = range(columns)
Fred Drake3d422662001-12-26 19:55:14 +0000264 colheight = int(len(nodes) / columns)
Fred Drake7cbf4621998-08-07 19:50:13 +0000265 if len(nodes) % columns:
266 colheight = colheight + 1
Fred Drake3d422662001-12-26 19:55:14 +0000267 colwidth = int(100 / columns)
Fred Drake7cbf4621998-08-07 19:50:13 +0000268 append('<table width="100%"><tr valign="top">')
269 for col in split_columns(nodes, columns):
270 append('<td width="%d%%">\n' % colwidth)
271 append(format_column(col))
272 append("\n</td>")
273 append("\n</tr></table>")
274 else:
275 append(format_column(nodes))
Fred Drake3d422662001-12-26 19:55:14 +0000276 return null_join(strings)
Fred Drakeec561091998-03-27 05:25:43 +0000277
278
279def format_letter(letter):
280 if letter == '.':
281 lettername = ". (dot)"
282 elif letter == '_':
283 lettername = "_ (underscore)"
284 else:
Fred Drake3d422662001-12-26 19:55:14 +0000285 lettername = letter.capitalize()
Fred Drake39724572003-11-25 16:21:00 +0000286 return "\n<hr />\n<h2 id=%s>%s</h2>\n\n" \
287 % (quoteattr("letter-" + letter), lettername)
Fred Drakeec561091998-03-27 05:25:43 +0000288
289
Fred Drake3d422662001-12-26 19:55:14 +0000290def format_html_letters(nodes, columns, group_symbol_nodes):
Fred Drakeec561091998-03-27 05:25:43 +0000291 letter_groups = split_letters(nodes)
Fred Drake3d422662001-12-26 19:55:14 +0000292 if group_symbol_nodes:
293 group_symbols(letter_groups)
Fred Drakeec561091998-03-27 05:25:43 +0000294 items = []
295 for letter, nodes in letter_groups:
296 s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
297 items.append(s)
Fred Drake39724572003-11-25 16:21:00 +0000298 s = ["<hr /><center>\n%s</center>\n" % " |\n".join(items)]
Fred Drakeec561091998-03-27 05:25:43 +0000299 for letter, nodes in letter_groups:
Fred Drake058068d1998-04-08 23:12:51 +0000300 s.append(format_letter(letter))
Fred Drake7cbf4621998-08-07 19:50:13 +0000301 s.append(format_nodes(nodes, columns))
Fred Drake3d422662001-12-26 19:55:14 +0000302 return null_join(s)
Fred Drakeec561091998-03-27 05:25:43 +0000303
Fred Drake077fffa1998-08-07 20:49:54 +0000304def format_html(nodes, columns):
305 return format_nodes(nodes, columns)
306
Fred Drakeec561091998-03-27 05:25:43 +0000307
308def collapse(nodes):
309 """Collapse sequences of nodes with matching keys into a single node.
310 Destructive."""
311 if len(nodes) < 2:
312 return
313 prev = nodes[0]
314 i = 1
315 while i < len(nodes):
316 node = nodes[i]
317 if not node.cmp_entry(prev):
318 prev.links.append(node.links[0])
319 del nodes[i]
Fred Drakeec561091998-03-27 05:25:43 +0000320 else:
321 i = i + 1
322 prev = node
323
324
325def dump(nodes, fp):
326 for node in nodes:
327 fp.write(node.dump())
328
329
Fred Drake3d422662001-12-26 19:55:14 +0000330def process_nodes(nodes, columns, letters=0, group_symbol_nodes=0):
Fred Drake711fe021999-02-24 16:36:48 +0000331 nodes.sort()
332 collapse(nodes)
333 if letters:
Fred Drake3d422662001-12-26 19:55:14 +0000334 return format_html_letters(nodes, columns, group_symbol_nodes)
Fred Drake711fe021999-02-24 16:36:48 +0000335 else:
336 return format_html(nodes, columns)
337
338
Fred Drakeec561091998-03-27 05:25:43 +0000339def main():
Fred Drake058068d1998-04-08 23:12:51 +0000340 import getopt
341 ifn = "-"
342 ofn = "-"
Fred Drake7cbf4621998-08-07 19:50:13 +0000343 columns = 1
Fred Drake077fffa1998-08-07 20:49:54 +0000344 letters = 0
Fred Drake3d422662001-12-26 19:55:14 +0000345 group_symbol_nodes = 1
Fred Drake077fffa1998-08-07 20:49:54 +0000346 opts, args = getopt.getopt(sys.argv[1:], "c:lo:",
Fred Drake3d422662001-12-26 19:55:14 +0000347 ["columns=", "dont-group-symbols",
348 "group-symbols", "letters", "output="])
Fred Drake058068d1998-04-08 23:12:51 +0000349 for opt, val in opts:
350 if opt in ("-o", "--output"):
351 ofn = val
Fred Drake7cbf4621998-08-07 19:50:13 +0000352 elif opt in ("-c", "--columns"):
Fred Drake3d422662001-12-26 19:55:14 +0000353 columns = int(val, 10)
Fred Drake077fffa1998-08-07 20:49:54 +0000354 elif opt in ("-l", "--letters"):
355 letters = 1
Fred Drake3d422662001-12-26 19:55:14 +0000356 elif opt == "--group-symbols":
357 group_symbol_nodes = 1
358 elif opt == "--dont-group-symbols":
359 group_symbol_nodes = 0
Fred Drake058068d1998-04-08 23:12:51 +0000360 if not args:
361 args = [ifn]
362 nodes = []
363 for fn in args:
364 nodes = nodes + load(open(fn))
Fred Drake03a02351998-12-28 20:46:53 +0000365 num_nodes = len(nodes)
Fred Drake3d422662001-12-26 19:55:14 +0000366 html = process_nodes(nodes, columns, letters, group_symbol_nodes)
Fred Drake03a02351998-12-28 20:46:53 +0000367 program = os.path.basename(sys.argv[0])
Fred Drake058068d1998-04-08 23:12:51 +0000368 if ofn == "-":
369 sys.stdout.write(html)
Fred Drake03a02351998-12-28 20:46:53 +0000370 sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes))
Fred Drake058068d1998-04-08 23:12:51 +0000371 else:
372 open(ofn, "w").write(html)
Fred Drake03a02351998-12-28 20:46:53 +0000373 print
374 print "%s: %d index nodes" % (program, num_nodes)
Fred Drakeec561091998-03-27 05:25:43 +0000375
376
377if __name__ == "__main__":
378 main()