blob: b40bd5231e5ca81121cf3bbb5a6e03c5722e4a73 [file] [log] [blame]
Fred Drakeec561091998-03-27 05:25:43 +00001#! /usr/bin/env python
2
Fred Drakeec561091998-03-27 05:25:43 +00003__version__ = '$Revision$'
4
Fred Drake03a02351998-12-28 20:46:53 +00005import os
Fred Drakeec561091998-03-27 05:25:43 +00006import re
7import string
8import sys
9
10
Fred Drake3d422662001-12-26 19:55:14 +000011bang_join = "!".join
12null_join = "".join
13
14
Fred Drakeec561091998-03-27 05:25:43 +000015class Node:
Fred Drakeec561091998-03-27 05:25:43 +000016 __rmjunk = re.compile("<#\d+#>")
17
Fred Drakeba828782000-04-03 04:19:14 +000018 continuation = 0
19
Fred Drakeec561091998-03-27 05:25:43 +000020 def __init__(self, link, str, seqno):
21 self.links = [link]
22 self.seqno = seqno
23 # remove <#\d+#> left in by moving the data out of LaTeX2HTML
24 str = self.__rmjunk.sub('', str)
Fred Drakeec561091998-03-27 05:25:43 +000025 # build up the text
Fred Drake058068d1998-04-08 23:12:51 +000026 self.text = split_entry_text(str)
27 self.key = split_entry_key(str)
Fred Drakeec561091998-03-27 05:25:43 +000028
29 def __cmp__(self, other):
30 """Comparison operator includes sequence number, for use with
31 list.sort()."""
32 return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
33
34 def cmp_entry(self, other):
35 """Comparison 'operator' that ignores sequence number."""
Fred Drake058068d1998-04-08 23:12:51 +000036 c = 0
Fred Drakeec561091998-03-27 05:25:43 +000037 for i in range(min(len(self.key), len(other.key))):
Fred Drake058068d1998-04-08 23:12:51 +000038 c = (cmp_part(self.key[i], other.key[i])
39 or cmp_part(self.text[i], other.text[i]))
Fred Drakeec561091998-03-27 05:25:43 +000040 if c:
Fred Drake058068d1998-04-08 23:12:51 +000041 break
42 return c or cmp(self.key, other.key) or cmp(self.text, other.text)
Fred Drakeec561091998-03-27 05:25:43 +000043
44 def __repr__(self):
Fred Drake3d422662001-12-26 19:55:14 +000045 return "<Node for %s (%s)>" % (bang_join(self.text), self.seqno)
Fred Drakeec561091998-03-27 05:25:43 +000046
47 def __str__(self):
Fred Drake3d422662001-12-26 19:55:14 +000048 return bang_join(self.key)
Fred Drakeec561091998-03-27 05:25:43 +000049
50 def dump(self):
Fred Drake058068d1998-04-08 23:12:51 +000051 return "%s\1%s###%s\n" \
52 % (string.join(self.links, "\1"),
Fred Drake3d422662001-12-26 19:55:14 +000053 bang_join(self.text),
Fred Drakeec561091998-03-27 05:25:43 +000054 self.seqno)
55
56
Fred Drake058068d1998-04-08 23:12:51 +000057def cmp_part(s1, s2):
58 result = cmp(s1, s2)
59 if result == 0:
60 return 0
Fred Drake3d422662001-12-26 19:55:14 +000061 l1 = s1.lower()
62 l2 = s2.lower()
Fred Drake058068d1998-04-08 23:12:51 +000063 minlen = min(len(s1), len(s2))
64 if len(s1) < len(s2) and l1 == l2[:len(s1)]:
65 result = -1
66 elif len(s2) < len(s1) and l2 == l1[:len(s2)]:
67 result = 1
68 else:
69 result = cmp(l1, l2) or cmp(s1, s2)
70 return result
71
72
73def split_entry(str, which):
74 stuff = []
Fred Drake3d422662001-12-26 19:55:14 +000075 parts = str.split('!')
76 parts = [part.split('@') for part in parts]
Fred Drake058068d1998-04-08 23:12:51 +000077 for entry in parts:
78 if len(entry) != 1:
79 key = entry[which]
80 else:
81 key = entry[0]
82 stuff.append(key)
83 return stuff
84
85
Fred Drakeba828782000-04-03 04:19:14 +000086_rmtt = re.compile(r"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""",
Fred Drake4cc902f1999-02-18 16:11:12 +000087 re.IGNORECASE)
Fred Drake058068d1998-04-08 23:12:51 +000088_rmparens = re.compile(r"\(\)")
89
90def split_entry_key(str):
91 parts = split_entry(str, 1)
92 for i in range(len(parts)):
93 m = _rmtt.match(parts[i])
94 if m:
Fred Drake3d422662001-12-26 19:55:14 +000095 parts[i] = null_join(m.group(1, 2, 3))
Fred Drake058068d1998-04-08 23:12:51 +000096 else:
Fred Drake3d422662001-12-26 19:55:14 +000097 parts[i] = parts[i].lower()
Fred Drake058068d1998-04-08 23:12:51 +000098 # remove '()' from the key:
99 parts[i] = _rmparens.sub('', parts[i])
100 return map(trim_ignored_letters, parts)
101
102
103def split_entry_text(str):
104 if '<' in str:
105 m = _rmtt.match(str)
106 if m:
Fred Drake3d422662001-12-26 19:55:14 +0000107 str = null_join(m.group(1, 2, 3))
Fred Drake058068d1998-04-08 23:12:51 +0000108 return split_entry(str, 1)
109
110
Fred Drakeec561091998-03-27 05:25:43 +0000111def load(fp):
112 nodes = []
Fred Drake058068d1998-04-08 23:12:51 +0000113 rx = re.compile("(.*)\1(.*)###(.*)$")
Fred Drakeec561091998-03-27 05:25:43 +0000114 while 1:
115 line = fp.readline()
116 if not line:
117 break
118 m = rx.match(line)
119 if m:
120 link, str, seqno = m.group(1, 2, 3)
121 nodes.append(Node(link, str, seqno))
122 return nodes
123
124
Fred Drake058068d1998-04-08 23:12:51 +0000125def trim_ignored_letters(s):
Fred Drake3b074801999-01-04 22:00:56 +0000126 # ignore $ to keep environment variables with the
127 # leading letter from the name
Fred Drake3d422662001-12-26 19:55:14 +0000128 if s.startswith("$"):
129 return s[1:].lower()
Fred Drake3b074801999-01-04 22:00:56 +0000130 else:
Fred Drake3d422662001-12-26 19:55:14 +0000131 return s.lower()
Fred Drake058068d1998-04-08 23:12:51 +0000132
133def get_first_letter(s):
Fred Drake3d422662001-12-26 19:55:14 +0000134 if s.startswith("<tex2html_percent_mark>"):
135 return "%"
136 else:
137 return trim_ignored_letters(s)[0]
Fred Drake058068d1998-04-08 23:12:51 +0000138
139
Fred Drakeec561091998-03-27 05:25:43 +0000140def split_letters(nodes):
141 letter_groups = []
Fred Drakeec561091998-03-27 05:25:43 +0000142 if nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000143 group = []
144 append = group.append
145 letter = get_first_letter(nodes[0].text[0])
Fred Drakeec561091998-03-27 05:25:43 +0000146 letter_groups.append((letter, group))
147 for node in nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000148 nletter = get_first_letter(node.text[0])
Fred Drakeec561091998-03-27 05:25:43 +0000149 if letter != nletter:
150 letter = nletter
151 group = []
152 letter_groups.append((letter, group))
153 append = group.append
154 append(node)
155 return letter_groups
156
157
Fred Drake3d422662001-12-26 19:55:14 +0000158def group_symbols(groups):
159 entries = []
160 ident_letters = string.ascii_letters + "_"
161 while groups[0][0] not in ident_letters:
162 entries += groups[0][1]
163 del groups[0]
164 if entries:
165 groups.insert(0, ("Symbols", entries))
166
167
Fred Drake7cbf4621998-08-07 19:50:13 +0000168# need a function to separate the nodes into columns...
169def split_columns(nodes, columns=1):
170 if columns <= 1:
Fred Drake077fffa1998-08-07 20:49:54 +0000171 return [nodes]
Fred Drake7cbf4621998-08-07 19:50:13 +0000172 # This is a rough height; we may have to increase to avoid breaks before
173 # a subitem.
Fred Drake3d422662001-12-26 19:55:14 +0000174 colheight = int(len(nodes) / columns)
175 numlong = int(len(nodes) % columns)
Fred Drake7cbf4621998-08-07 19:50:13 +0000176 if numlong:
177 colheight = colheight + 1
178 else:
179 numlong = columns
180 cols = []
181 for i in range(numlong):
182 start = i * colheight
183 end = start + colheight
184 cols.append(nodes[start:end])
185 del nodes[:end]
186 colheight = colheight - 1
187 try:
Fred Drake3d422662001-12-26 19:55:14 +0000188 numshort = int(len(nodes) / colheight)
Fred Drake7cbf4621998-08-07 19:50:13 +0000189 except ZeroDivisionError:
190 cols = cols + (columns - len(cols)) * [[]]
191 else:
192 for i in range(numshort):
193 start = i * colheight
194 end = start + colheight
195 cols.append(nodes[start:end])
Fred Drakeba828782000-04-03 04:19:14 +0000196 #
197 # If items continue across columns, make sure they are marked
198 # as continuations so the user knows to look at the previous column.
199 #
200 for i in range(len(cols) - 1):
201 try:
202 prev = cols[i][-1]
203 next = cols[i + 1][0]
204 except IndexError:
205 return cols
206 else:
207 n = min(len(prev.key), len(next.key))
208 for j in range(n):
209 if prev.key[j] != next.key[j]:
210 break
211 next.continuation = j + 1
Fred Drake077fffa1998-08-07 20:49:54 +0000212 return cols
Fred Drake7cbf4621998-08-07 19:50:13 +0000213
214
Fred Drake058068d1998-04-08 23:12:51 +0000215DL_LEVEL_INDENT = " "
216
Fred Drake7cbf4621998-08-07 19:50:13 +0000217def format_column(nodes):
Fred Drakeec561091998-03-27 05:25:43 +0000218 strings = ["<dl compact>"]
219 append = strings.append
Fred Drake7cbf4621998-08-07 19:50:13 +0000220 level = 0
Fred Drake058068d1998-04-08 23:12:51 +0000221 previous = []
Fred Drakeec561091998-03-27 05:25:43 +0000222 for node in nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000223 current = node.text
224 count = 0
225 for i in range(min(len(current), len(previous))):
226 if previous[i] != current[i]:
227 break
228 count = i + 1
229 if count > level:
230 append("<dl compact>" * (count - level) + "\n")
231 level = count
232 elif level > count:
233 append("\n")
234 append(level * DL_LEVEL_INDENT)
235 append("</dl>" * (level - count))
236 level = count
237 # else: level == count
238 for i in range(count, len(current) - 1):
239 term = node.text[i]
240 level = level + 1
Fred Drakeba828782000-04-03 04:19:14 +0000241 if node.continuation > i:
242 extra = " (continued)"
243 else:
244 extra = ""
245 append("\n<dt>%s%s\n<dd>\n%s<dl compact>"
246 % (term, extra, level * DL_LEVEL_INDENT))
Fred Drake058068d1998-04-08 23:12:51 +0000247 append("\n%s<dt>%s%s</a>"
248 % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1]))
Fred Drakeec561091998-03-27 05:25:43 +0000249 for link in node.links[1:]:
Fred Drake058068d1998-04-08 23:12:51 +0000250 append(",\n%s %s[Link]</a>" % (level * DL_LEVEL_INDENT, link))
251 previous = current
252 append("\n")
Fred Drake2b8c95e1998-04-11 16:26:02 +0000253 append("</dl>" * (level + 1))
Fred Drake3d422662001-12-26 19:55:14 +0000254 return null_join(strings)
Fred Drake7cbf4621998-08-07 19:50:13 +0000255
256
257def format_nodes(nodes, columns=1):
258 strings = []
259 append = strings.append
260 if columns > 1:
261 colnos = range(columns)
Fred Drake3d422662001-12-26 19:55:14 +0000262 colheight = int(len(nodes) / columns)
Fred Drake7cbf4621998-08-07 19:50:13 +0000263 if len(nodes) % columns:
264 colheight = colheight + 1
Fred Drake3d422662001-12-26 19:55:14 +0000265 colwidth = int(100 / columns)
Fred Drake7cbf4621998-08-07 19:50:13 +0000266 append('<table width="100%"><tr valign="top">')
267 for col in split_columns(nodes, columns):
268 append('<td width="%d%%">\n' % colwidth)
269 append(format_column(col))
270 append("\n</td>")
271 append("\n</tr></table>")
272 else:
273 append(format_column(nodes))
Fred Drake2b8c95e1998-04-11 16:26:02 +0000274 append("\n<p>\n")
Fred Drake3d422662001-12-26 19:55:14 +0000275 return null_join(strings)
Fred Drakeec561091998-03-27 05:25:43 +0000276
277
278def format_letter(letter):
279 if letter == '.':
280 lettername = ". (dot)"
281 elif letter == '_':
282 lettername = "_ (underscore)"
283 else:
Fred Drake3d422662001-12-26 19:55:14 +0000284 lettername = letter.capitalize()
Fred Drake058068d1998-04-08 23:12:51 +0000285 return "\n<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \
Fred Drakeec561091998-03-27 05:25:43 +0000286 % (letter, lettername)
287
288
Fred Drake3d422662001-12-26 19:55:14 +0000289def format_html_letters(nodes, columns, group_symbol_nodes):
Fred Drakeec561091998-03-27 05:25:43 +0000290 letter_groups = split_letters(nodes)
Fred Drake3d422662001-12-26 19:55:14 +0000291 if group_symbol_nodes:
292 group_symbols(letter_groups)
Fred Drakeec561091998-03-27 05:25:43 +0000293 items = []
294 for letter, nodes in letter_groups:
295 s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
296 items.append(s)
Fred Drake058068d1998-04-08 23:12:51 +0000297 s = ["<hr><center>\n%s</center>\n" % string.join(items, " |\n")]
Fred Drakeec561091998-03-27 05:25:43 +0000298 for letter, nodes in letter_groups:
Fred Drake058068d1998-04-08 23:12:51 +0000299 s.append(format_letter(letter))
Fred Drake7cbf4621998-08-07 19:50:13 +0000300 s.append(format_nodes(nodes, columns))
Fred Drake3d422662001-12-26 19:55:14 +0000301 return null_join(s)
Fred Drakeec561091998-03-27 05:25:43 +0000302
Fred Drake077fffa1998-08-07 20:49:54 +0000303def format_html(nodes, columns):
304 return format_nodes(nodes, columns)
305
Fred Drakeec561091998-03-27 05:25:43 +0000306
307def collapse(nodes):
308 """Collapse sequences of nodes with matching keys into a single node.
309 Destructive."""
310 if len(nodes) < 2:
311 return
312 prev = nodes[0]
313 i = 1
314 while i < len(nodes):
315 node = nodes[i]
316 if not node.cmp_entry(prev):
317 prev.links.append(node.links[0])
318 del nodes[i]
Fred Drakeec561091998-03-27 05:25:43 +0000319 else:
320 i = i + 1
321 prev = node
322
323
324def dump(nodes, fp):
325 for node in nodes:
326 fp.write(node.dump())
327
328
Fred Drake3d422662001-12-26 19:55:14 +0000329def process_nodes(nodes, columns, letters=0, group_symbol_nodes=0):
Fred Drake711fe021999-02-24 16:36:48 +0000330 nodes.sort()
331 collapse(nodes)
332 if letters:
Fred Drake3d422662001-12-26 19:55:14 +0000333 return format_html_letters(nodes, columns, group_symbol_nodes)
Fred Drake711fe021999-02-24 16:36:48 +0000334 else:
335 return format_html(nodes, columns)
336
337
Fred Drakeec561091998-03-27 05:25:43 +0000338def main():
Fred Drake058068d1998-04-08 23:12:51 +0000339 import getopt
340 ifn = "-"
341 ofn = "-"
Fred Drake7cbf4621998-08-07 19:50:13 +0000342 columns = 1
Fred Drake077fffa1998-08-07 20:49:54 +0000343 letters = 0
Fred Drake3d422662001-12-26 19:55:14 +0000344 group_symbol_nodes = 1
Fred Drake077fffa1998-08-07 20:49:54 +0000345 opts, args = getopt.getopt(sys.argv[1:], "c:lo:",
Fred Drake3d422662001-12-26 19:55:14 +0000346 ["columns=", "dont-group-symbols",
347 "group-symbols", "letters", "output="])
Fred Drake058068d1998-04-08 23:12:51 +0000348 for opt, val in opts:
349 if opt in ("-o", "--output"):
350 ofn = val
Fred Drake7cbf4621998-08-07 19:50:13 +0000351 elif opt in ("-c", "--columns"):
Fred Drake3d422662001-12-26 19:55:14 +0000352 columns = int(val, 10)
Fred Drake077fffa1998-08-07 20:49:54 +0000353 elif opt in ("-l", "--letters"):
354 letters = 1
Fred Drake3d422662001-12-26 19:55:14 +0000355 elif opt == "--group-symbols":
356 group_symbol_nodes = 1
357 elif opt == "--dont-group-symbols":
358 group_symbol_nodes = 0
Fred Drake058068d1998-04-08 23:12:51 +0000359 if not args:
360 args = [ifn]
361 nodes = []
362 for fn in args:
363 nodes = nodes + load(open(fn))
Fred Drake03a02351998-12-28 20:46:53 +0000364 num_nodes = len(nodes)
Fred Drake3d422662001-12-26 19:55:14 +0000365 html = process_nodes(nodes, columns, letters, group_symbol_nodes)
Fred Drake03a02351998-12-28 20:46:53 +0000366 program = os.path.basename(sys.argv[0])
Fred Drake058068d1998-04-08 23:12:51 +0000367 if ofn == "-":
368 sys.stdout.write(html)
Fred Drake03a02351998-12-28 20:46:53 +0000369 sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes))
Fred Drake058068d1998-04-08 23:12:51 +0000370 else:
371 open(ofn, "w").write(html)
Fred Drake03a02351998-12-28 20:46:53 +0000372 print
373 print "%s: %d index nodes" % (program, num_nodes)
Fred Drakeec561091998-03-27 05:25:43 +0000374
375
376if __name__ == "__main__":
377 main()