blob: 245122133b759e107581c6c91cbda5b9a4aaeabc [file] [log] [blame]
Fred Drakeec561091998-03-27 05:25:43 +00001#! /usr/bin/env python
2
Fred Drakeec561091998-03-27 05:25:43 +00003__version__ = '$Revision$'
4
Fred Drake03a02351998-12-28 20:46:53 +00005import os
Fred Drakeec561091998-03-27 05:25:43 +00006import re
7import string
8import sys
9
10
11class Node:
Fred Drakeec561091998-03-27 05:25:43 +000012 __rmjunk = re.compile("<#\d+#>")
13
Fred Drakeba828782000-04-03 04:19:14 +000014 continuation = 0
15
Fred Drakeec561091998-03-27 05:25:43 +000016 def __init__(self, link, str, seqno):
17 self.links = [link]
18 self.seqno = seqno
19 # remove <#\d+#> left in by moving the data out of LaTeX2HTML
20 str = self.__rmjunk.sub('', str)
Fred Drakeec561091998-03-27 05:25:43 +000021 # build up the text
Fred Drake058068d1998-04-08 23:12:51 +000022 self.text = split_entry_text(str)
23 self.key = split_entry_key(str)
Fred Drakeec561091998-03-27 05:25:43 +000024
25 def __cmp__(self, other):
26 """Comparison operator includes sequence number, for use with
27 list.sort()."""
28 return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
29
30 def cmp_entry(self, other):
31 """Comparison 'operator' that ignores sequence number."""
Fred Drake058068d1998-04-08 23:12:51 +000032 c = 0
Fred Drakeec561091998-03-27 05:25:43 +000033 for i in range(min(len(self.key), len(other.key))):
Fred Drake058068d1998-04-08 23:12:51 +000034 c = (cmp_part(self.key[i], other.key[i])
35 or cmp_part(self.text[i], other.text[i]))
Fred Drakeec561091998-03-27 05:25:43 +000036 if c:
Fred Drake058068d1998-04-08 23:12:51 +000037 break
38 return c or cmp(self.key, other.key) or cmp(self.text, other.text)
Fred Drakeec561091998-03-27 05:25:43 +000039
40 def __repr__(self):
41 return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno)
42
43 def __str__(self):
44 return string.join(self.key, '!')
45
46 def dump(self):
Fred Drake058068d1998-04-08 23:12:51 +000047 return "%s\1%s###%s\n" \
48 % (string.join(self.links, "\1"),
Fred Drakeec561091998-03-27 05:25:43 +000049 string.join(self.text, '!'),
50 self.seqno)
51
52
Fred Drake058068d1998-04-08 23:12:51 +000053def cmp_part(s1, s2):
54 result = cmp(s1, s2)
55 if result == 0:
56 return 0
57 l1 = string.lower(s1)
58 l2 = string.lower(s2)
59 minlen = min(len(s1), len(s2))
60 if len(s1) < len(s2) and l1 == l2[:len(s1)]:
61 result = -1
62 elif len(s2) < len(s1) and l2 == l1[:len(s2)]:
63 result = 1
64 else:
65 result = cmp(l1, l2) or cmp(s1, s2)
66 return result
67
68
69def split_entry(str, which):
70 stuff = []
71 parts = string.split(str, '!')
72 parts = map(string.split, parts, ['@'] * len(parts))
73 for entry in parts:
74 if len(entry) != 1:
75 key = entry[which]
76 else:
77 key = entry[0]
78 stuff.append(key)
79 return stuff
80
81
Fred Drakeba828782000-04-03 04:19:14 +000082_rmtt = re.compile(r"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""",
Fred Drake4cc902f1999-02-18 16:11:12 +000083 re.IGNORECASE)
Fred Drake058068d1998-04-08 23:12:51 +000084_rmparens = re.compile(r"\(\)")
85
86def split_entry_key(str):
87 parts = split_entry(str, 1)
88 for i in range(len(parts)):
89 m = _rmtt.match(parts[i])
90 if m:
91 parts[i] = string.join(m.group(1, 2, 3), '')
92 else:
93 parts[i] = string.lower(parts[i])
94 # remove '()' from the key:
95 parts[i] = _rmparens.sub('', parts[i])
96 return map(trim_ignored_letters, parts)
97
98
99def split_entry_text(str):
100 if '<' in str:
101 m = _rmtt.match(str)
102 if m:
103 str = string.join(m.group(1, 2, 3), '')
104 return split_entry(str, 1)
105
106
Fred Drakeec561091998-03-27 05:25:43 +0000107def load(fp):
108 nodes = []
Fred Drake058068d1998-04-08 23:12:51 +0000109 rx = re.compile("(.*)\1(.*)###(.*)$")
Fred Drakeec561091998-03-27 05:25:43 +0000110 while 1:
111 line = fp.readline()
112 if not line:
113 break
114 m = rx.match(line)
115 if m:
116 link, str, seqno = m.group(1, 2, 3)
117 nodes.append(Node(link, str, seqno))
118 return nodes
119
120
Fred Drake058068d1998-04-08 23:12:51 +0000121def trim_ignored_letters(s):
Fred Drake3b074801999-01-04 22:00:56 +0000122 # ignore $ to keep environment variables with the
123 # leading letter from the name
Fred Drake058068d1998-04-08 23:12:51 +0000124 s = string.lower(s)
Fred Drake3b074801999-01-04 22:00:56 +0000125 if s[0] == "$":
126 return s[1:]
127 else:
128 return s
Fred Drake058068d1998-04-08 23:12:51 +0000129
130def get_first_letter(s):
131 return string.lower(trim_ignored_letters(s)[0])
132
133
Fred Drakeec561091998-03-27 05:25:43 +0000134def split_letters(nodes):
135 letter_groups = []
Fred Drakeec561091998-03-27 05:25:43 +0000136 if nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000137 group = []
138 append = group.append
139 letter = get_first_letter(nodes[0].text[0])
Fred Drakeec561091998-03-27 05:25:43 +0000140 letter_groups.append((letter, group))
141 for node in nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000142 nletter = get_first_letter(node.text[0])
Fred Drakeec561091998-03-27 05:25:43 +0000143 if letter != nletter:
144 letter = nletter
145 group = []
146 letter_groups.append((letter, group))
147 append = group.append
148 append(node)
149 return letter_groups
150
151
Fred Drake7cbf4621998-08-07 19:50:13 +0000152# need a function to separate the nodes into columns...
153def split_columns(nodes, columns=1):
154 if columns <= 1:
Fred Drake077fffa1998-08-07 20:49:54 +0000155 return [nodes]
Fred Drake7cbf4621998-08-07 19:50:13 +0000156 # This is a rough height; we may have to increase to avoid breaks before
157 # a subitem.
158 colheight = len(nodes) / columns
159 numlong = len(nodes) % columns
160 if numlong:
161 colheight = colheight + 1
162 else:
163 numlong = columns
164 cols = []
165 for i in range(numlong):
166 start = i * colheight
167 end = start + colheight
168 cols.append(nodes[start:end])
169 del nodes[:end]
170 colheight = colheight - 1
171 try:
172 numshort = len(nodes) / colheight
173 except ZeroDivisionError:
174 cols = cols + (columns - len(cols)) * [[]]
175 else:
176 for i in range(numshort):
177 start = i * colheight
178 end = start + colheight
179 cols.append(nodes[start:end])
Fred Drakeba828782000-04-03 04:19:14 +0000180 #
181 # If items continue across columns, make sure they are marked
182 # as continuations so the user knows to look at the previous column.
183 #
184 for i in range(len(cols) - 1):
185 try:
186 prev = cols[i][-1]
187 next = cols[i + 1][0]
188 except IndexError:
189 return cols
190 else:
191 n = min(len(prev.key), len(next.key))
192 for j in range(n):
193 if prev.key[j] != next.key[j]:
194 break
195 next.continuation = j + 1
Fred Drake077fffa1998-08-07 20:49:54 +0000196 return cols
Fred Drake7cbf4621998-08-07 19:50:13 +0000197
198
Fred Drake058068d1998-04-08 23:12:51 +0000199DL_LEVEL_INDENT = " "
200
Fred Drake7cbf4621998-08-07 19:50:13 +0000201def format_column(nodes):
Fred Drakeec561091998-03-27 05:25:43 +0000202 strings = ["<dl compact>"]
203 append = strings.append
Fred Drake7cbf4621998-08-07 19:50:13 +0000204 level = 0
Fred Drake058068d1998-04-08 23:12:51 +0000205 previous = []
Fred Drakeec561091998-03-27 05:25:43 +0000206 for node in nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000207 current = node.text
208 count = 0
209 for i in range(min(len(current), len(previous))):
210 if previous[i] != current[i]:
211 break
212 count = i + 1
213 if count > level:
214 append("<dl compact>" * (count - level) + "\n")
215 level = count
216 elif level > count:
217 append("\n")
218 append(level * DL_LEVEL_INDENT)
219 append("</dl>" * (level - count))
220 level = count
221 # else: level == count
222 for i in range(count, len(current) - 1):
223 term = node.text[i]
224 level = level + 1
Fred Drakeba828782000-04-03 04:19:14 +0000225 if node.continuation > i:
226 extra = " (continued)"
227 else:
228 extra = ""
229 append("\n<dt>%s%s\n<dd>\n%s<dl compact>"
230 % (term, extra, level * DL_LEVEL_INDENT))
Fred Drake058068d1998-04-08 23:12:51 +0000231 append("\n%s<dt>%s%s</a>"
232 % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1]))
Fred Drakeec561091998-03-27 05:25:43 +0000233 for link in node.links[1:]:
Fred Drake058068d1998-04-08 23:12:51 +0000234 append(",\n%s %s[Link]</a>" % (level * DL_LEVEL_INDENT, link))
235 previous = current
236 append("\n")
Fred Drake2b8c95e1998-04-11 16:26:02 +0000237 append("</dl>" * (level + 1))
Fred Drake7cbf4621998-08-07 19:50:13 +0000238 return string.join(strings, '')
239
240
241def format_nodes(nodes, columns=1):
242 strings = []
243 append = strings.append
244 if columns > 1:
245 colnos = range(columns)
246 colheight = len(nodes) / columns
247 if len(nodes) % columns:
248 colheight = colheight + 1
249 colwidth = 100 / columns
250 append('<table width="100%"><tr valign="top">')
251 for col in split_columns(nodes, columns):
252 append('<td width="%d%%">\n' % colwidth)
253 append(format_column(col))
254 append("\n</td>")
255 append("\n</tr></table>")
256 else:
257 append(format_column(nodes))
Fred Drake2b8c95e1998-04-11 16:26:02 +0000258 append("\n<p>\n")
Fred Drake058068d1998-04-08 23:12:51 +0000259 return string.join(strings, '')
Fred Drakeec561091998-03-27 05:25:43 +0000260
261
262def format_letter(letter):
263 if letter == '.':
264 lettername = ". (dot)"
265 elif letter == '_':
266 lettername = "_ (underscore)"
267 else:
268 lettername = string.upper(letter)
Fred Drake058068d1998-04-08 23:12:51 +0000269 return "\n<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \
Fred Drakeec561091998-03-27 05:25:43 +0000270 % (letter, lettername)
271
272
Fred Drake077fffa1998-08-07 20:49:54 +0000273def format_html_letters(nodes, columns=1):
Fred Drakeec561091998-03-27 05:25:43 +0000274 letter_groups = split_letters(nodes)
275 items = []
276 for letter, nodes in letter_groups:
277 s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
278 items.append(s)
Fred Drake058068d1998-04-08 23:12:51 +0000279 s = ["<hr><center>\n%s</center>\n" % string.join(items, " |\n")]
Fred Drakeec561091998-03-27 05:25:43 +0000280 for letter, nodes in letter_groups:
Fred Drake058068d1998-04-08 23:12:51 +0000281 s.append(format_letter(letter))
Fred Drake7cbf4621998-08-07 19:50:13 +0000282 s.append(format_nodes(nodes, columns))
Fred Drake058068d1998-04-08 23:12:51 +0000283 return string.join(s, '')
Fred Drakeec561091998-03-27 05:25:43 +0000284
Fred Drake077fffa1998-08-07 20:49:54 +0000285def format_html(nodes, columns):
286 return format_nodes(nodes, columns)
287
Fred Drakeec561091998-03-27 05:25:43 +0000288
289def collapse(nodes):
290 """Collapse sequences of nodes with matching keys into a single node.
291 Destructive."""
292 if len(nodes) < 2:
293 return
294 prev = nodes[0]
295 i = 1
296 while i < len(nodes):
297 node = nodes[i]
298 if not node.cmp_entry(prev):
299 prev.links.append(node.links[0])
300 del nodes[i]
Fred Drakeec561091998-03-27 05:25:43 +0000301 else:
302 i = i + 1
303 prev = node
304
305
306def dump(nodes, fp):
307 for node in nodes:
308 fp.write(node.dump())
309
310
Fred Drake711fe021999-02-24 16:36:48 +0000311def process_nodes(nodes, columns, letters):
312 nodes.sort()
313 collapse(nodes)
314 if letters:
315 return format_html_letters(nodes, columns)
316 else:
317 return format_html(nodes, columns)
318
319
Fred Drakeec561091998-03-27 05:25:43 +0000320def main():
Fred Drake058068d1998-04-08 23:12:51 +0000321 import getopt
322 ifn = "-"
323 ofn = "-"
Fred Drake7cbf4621998-08-07 19:50:13 +0000324 columns = 1
Fred Drake077fffa1998-08-07 20:49:54 +0000325 letters = 0
326 opts, args = getopt.getopt(sys.argv[1:], "c:lo:",
327 ["columns=", "letters", "output="])
Fred Drake058068d1998-04-08 23:12:51 +0000328 for opt, val in opts:
329 if opt in ("-o", "--output"):
330 ofn = val
Fred Drake7cbf4621998-08-07 19:50:13 +0000331 elif opt in ("-c", "--columns"):
332 columns = string.atoi(val)
Fred Drake077fffa1998-08-07 20:49:54 +0000333 elif opt in ("-l", "--letters"):
334 letters = 1
Fred Drake058068d1998-04-08 23:12:51 +0000335 if not args:
336 args = [ifn]
337 nodes = []
338 for fn in args:
339 nodes = nodes + load(open(fn))
Fred Drake03a02351998-12-28 20:46:53 +0000340 num_nodes = len(nodes)
Fred Drake711fe021999-02-24 16:36:48 +0000341 html = process_nodes(nodes, columns, letters)
Fred Drake03a02351998-12-28 20:46:53 +0000342 program = os.path.basename(sys.argv[0])
Fred Drake058068d1998-04-08 23:12:51 +0000343 if ofn == "-":
344 sys.stdout.write(html)
Fred Drake03a02351998-12-28 20:46:53 +0000345 sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes))
Fred Drake058068d1998-04-08 23:12:51 +0000346 else:
347 open(ofn, "w").write(html)
Fred Drake03a02351998-12-28 20:46:53 +0000348 print
349 print "%s: %d index nodes" % (program, num_nodes)
Fred Drakeec561091998-03-27 05:25:43 +0000350
351
352if __name__ == "__main__":
353 main()