blob: e282f3cc61f2050a0ff04cd1ad225a92374498eb [file] [log] [blame]
Fred Drakeec561091998-03-27 05:25:43 +00001#! /usr/bin/env python
2
Fred Drakeec561091998-03-27 05:25:43 +00003__version__ = '$Revision$'
4
Fred Drake03a02351998-12-28 20:46:53 +00005import os
Fred Drakeec561091998-03-27 05:25:43 +00006import re
7import string
8import sys
9
10
11class Node:
Fred Drakeec561091998-03-27 05:25:43 +000012 __rmjunk = re.compile("<#\d+#>")
13
14 def __init__(self, link, str, seqno):
15 self.links = [link]
16 self.seqno = seqno
17 # remove <#\d+#> left in by moving the data out of LaTeX2HTML
18 str = self.__rmjunk.sub('', str)
19 # now remove <tt>...</tt> markup; contents remain.
Fred Drakeec561091998-03-27 05:25:43 +000020 # build up the text
Fred Drake058068d1998-04-08 23:12:51 +000021 self.text = split_entry_text(str)
22 self.key = split_entry_key(str)
Fred Drakeec561091998-03-27 05:25:43 +000023
24 def __cmp__(self, other):
25 """Comparison operator includes sequence number, for use with
26 list.sort()."""
27 return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
28
29 def cmp_entry(self, other):
30 """Comparison 'operator' that ignores sequence number."""
Fred Drake058068d1998-04-08 23:12:51 +000031 c = 0
Fred Drakeec561091998-03-27 05:25:43 +000032 for i in range(min(len(self.key), len(other.key))):
Fred Drake058068d1998-04-08 23:12:51 +000033 c = (cmp_part(self.key[i], other.key[i])
34 or cmp_part(self.text[i], other.text[i]))
Fred Drakeec561091998-03-27 05:25:43 +000035 if c:
Fred Drake058068d1998-04-08 23:12:51 +000036 break
37 return c or cmp(self.key, other.key) or cmp(self.text, other.text)
Fred Drakeec561091998-03-27 05:25:43 +000038
39 def __repr__(self):
40 return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno)
41
42 def __str__(self):
43 return string.join(self.key, '!')
44
45 def dump(self):
Fred Drake058068d1998-04-08 23:12:51 +000046 return "%s\1%s###%s\n" \
47 % (string.join(self.links, "\1"),
Fred Drakeec561091998-03-27 05:25:43 +000048 string.join(self.text, '!'),
49 self.seqno)
50
51
Fred Drake058068d1998-04-08 23:12:51 +000052def cmp_part(s1, s2):
53 result = cmp(s1, s2)
54 if result == 0:
55 return 0
56 l1 = string.lower(s1)
57 l2 = string.lower(s2)
58 minlen = min(len(s1), len(s2))
59 if len(s1) < len(s2) and l1 == l2[:len(s1)]:
60 result = -1
61 elif len(s2) < len(s1) and l2 == l1[:len(s2)]:
62 result = 1
63 else:
64 result = cmp(l1, l2) or cmp(s1, s2)
65 return result
66
67
68def split_entry(str, which):
69 stuff = []
70 parts = string.split(str, '!')
71 parts = map(string.split, parts, ['@'] * len(parts))
72 for entry in parts:
73 if len(entry) != 1:
74 key = entry[which]
75 else:
76 key = entry[0]
77 stuff.append(key)
78 return stuff
79
80
81_rmtt = re.compile(r"(.*)<tt>(.*)</tt>(.*)$", re.IGNORECASE)
82_rmparens = re.compile(r"\(\)")
83
84def split_entry_key(str):
85 parts = split_entry(str, 1)
86 for i in range(len(parts)):
87 m = _rmtt.match(parts[i])
88 if m:
89 parts[i] = string.join(m.group(1, 2, 3), '')
90 else:
91 parts[i] = string.lower(parts[i])
92 # remove '()' from the key:
93 parts[i] = _rmparens.sub('', parts[i])
94 return map(trim_ignored_letters, parts)
95
96
97def split_entry_text(str):
98 if '<' in str:
99 m = _rmtt.match(str)
100 if m:
101 str = string.join(m.group(1, 2, 3), '')
102 return split_entry(str, 1)
103
104
Fred Drakeec561091998-03-27 05:25:43 +0000105def load(fp):
106 nodes = []
Fred Drake058068d1998-04-08 23:12:51 +0000107 rx = re.compile("(.*)\1(.*)###(.*)$")
Fred Drakeec561091998-03-27 05:25:43 +0000108 while 1:
109 line = fp.readline()
110 if not line:
111 break
112 m = rx.match(line)
113 if m:
114 link, str, seqno = m.group(1, 2, 3)
115 nodes.append(Node(link, str, seqno))
116 return nodes
117
118
Fred Drake058068d1998-04-08 23:12:51 +0000119# ignore $ to keep environment variables with the leading letter from the name
120SKIP_LETTERS = "$"
121
122def trim_ignored_letters(s):
123 s = string.lower(s)
124 while s[0] in SKIP_LETTERS:
125 s = s[1:]
126 return s
127
128def get_first_letter(s):
129 return string.lower(trim_ignored_letters(s)[0])
130
131
Fred Drakeec561091998-03-27 05:25:43 +0000132def split_letters(nodes):
133 letter_groups = []
Fred Drakeec561091998-03-27 05:25:43 +0000134 if nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000135 group = []
136 append = group.append
137 letter = get_first_letter(nodes[0].text[0])
Fred Drakeec561091998-03-27 05:25:43 +0000138 letter_groups.append((letter, group))
139 for node in nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000140 nletter = get_first_letter(node.text[0])
Fred Drakeec561091998-03-27 05:25:43 +0000141 if letter != nletter:
142 letter = nletter
143 group = []
144 letter_groups.append((letter, group))
145 append = group.append
146 append(node)
147 return letter_groups
148
149
Fred Drake7cbf4621998-08-07 19:50:13 +0000150# need a function to separate the nodes into columns...
151def split_columns(nodes, columns=1):
152 if columns <= 1:
Fred Drake077fffa1998-08-07 20:49:54 +0000153 return [nodes]
Fred Drake7cbf4621998-08-07 19:50:13 +0000154 # This is a rough height; we may have to increase to avoid breaks before
155 # a subitem.
156 colheight = len(nodes) / columns
157 numlong = len(nodes) % columns
158 if numlong:
159 colheight = colheight + 1
160 else:
161 numlong = columns
162 cols = []
163 for i in range(numlong):
164 start = i * colheight
165 end = start + colheight
166 cols.append(nodes[start:end])
167 del nodes[:end]
168 colheight = colheight - 1
169 try:
170 numshort = len(nodes) / colheight
171 except ZeroDivisionError:
172 cols = cols + (columns - len(cols)) * [[]]
173 else:
174 for i in range(numshort):
175 start = i * colheight
176 end = start + colheight
177 cols.append(nodes[start:end])
Fred Drake077fffa1998-08-07 20:49:54 +0000178 return cols
Fred Drake7cbf4621998-08-07 19:50:13 +0000179
180
Fred Drake058068d1998-04-08 23:12:51 +0000181DL_LEVEL_INDENT = " "
182
Fred Drake7cbf4621998-08-07 19:50:13 +0000183def format_column(nodes):
Fred Drakeec561091998-03-27 05:25:43 +0000184 strings = ["<dl compact>"]
185 append = strings.append
Fred Drake7cbf4621998-08-07 19:50:13 +0000186 level = 0
Fred Drake058068d1998-04-08 23:12:51 +0000187 previous = []
Fred Drakeec561091998-03-27 05:25:43 +0000188 for node in nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000189 current = node.text
190 count = 0
191 for i in range(min(len(current), len(previous))):
192 if previous[i] != current[i]:
193 break
194 count = i + 1
195 if count > level:
196 append("<dl compact>" * (count - level) + "\n")
197 level = count
198 elif level > count:
199 append("\n")
200 append(level * DL_LEVEL_INDENT)
201 append("</dl>" * (level - count))
202 level = count
203 # else: level == count
204 for i in range(count, len(current) - 1):
205 term = node.text[i]
206 level = level + 1
207 append("\n<dt>%s\n<dd>\n%s<dl compact>"
208 % (term, level * DL_LEVEL_INDENT))
209 append("\n%s<dt>%s%s</a>"
210 % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1]))
Fred Drakeec561091998-03-27 05:25:43 +0000211 for link in node.links[1:]:
Fred Drake058068d1998-04-08 23:12:51 +0000212 append(",\n%s %s[Link]</a>" % (level * DL_LEVEL_INDENT, link))
213 previous = current
214 append("\n")
Fred Drake2b8c95e1998-04-11 16:26:02 +0000215 append("</dl>" * (level + 1))
Fred Drake7cbf4621998-08-07 19:50:13 +0000216 return string.join(strings, '')
217
218
219def format_nodes(nodes, columns=1):
220 strings = []
221 append = strings.append
222 if columns > 1:
223 colnos = range(columns)
224 colheight = len(nodes) / columns
225 if len(nodes) % columns:
226 colheight = colheight + 1
227 colwidth = 100 / columns
228 append('<table width="100%"><tr valign="top">')
229 for col in split_columns(nodes, columns):
230 append('<td width="%d%%">\n' % colwidth)
231 append(format_column(col))
232 append("\n</td>")
233 append("\n</tr></table>")
234 else:
235 append(format_column(nodes))
Fred Drake2b8c95e1998-04-11 16:26:02 +0000236 append("\n<p>\n")
Fred Drake058068d1998-04-08 23:12:51 +0000237 return string.join(strings, '')
Fred Drakeec561091998-03-27 05:25:43 +0000238
239
240def format_letter(letter):
241 if letter == '.':
242 lettername = ". (dot)"
243 elif letter == '_':
244 lettername = "_ (underscore)"
245 else:
246 lettername = string.upper(letter)
Fred Drake058068d1998-04-08 23:12:51 +0000247 return "\n<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \
Fred Drakeec561091998-03-27 05:25:43 +0000248 % (letter, lettername)
249
250
Fred Drake077fffa1998-08-07 20:49:54 +0000251def format_html_letters(nodes, columns=1):
Fred Drakeec561091998-03-27 05:25:43 +0000252 letter_groups = split_letters(nodes)
253 items = []
254 for letter, nodes in letter_groups:
255 s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
256 items.append(s)
Fred Drake058068d1998-04-08 23:12:51 +0000257 s = ["<hr><center>\n%s</center>\n" % string.join(items, " |\n")]
Fred Drakeec561091998-03-27 05:25:43 +0000258 for letter, nodes in letter_groups:
Fred Drake058068d1998-04-08 23:12:51 +0000259 s.append(format_letter(letter))
Fred Drake7cbf4621998-08-07 19:50:13 +0000260 s.append(format_nodes(nodes, columns))
Fred Drake058068d1998-04-08 23:12:51 +0000261 return string.join(s, '')
Fred Drakeec561091998-03-27 05:25:43 +0000262
Fred Drake077fffa1998-08-07 20:49:54 +0000263def format_html(nodes, columns):
264 return format_nodes(nodes, columns)
265
Fred Drakeec561091998-03-27 05:25:43 +0000266
267def collapse(nodes):
268 """Collapse sequences of nodes with matching keys into a single node.
269 Destructive."""
270 if len(nodes) < 2:
271 return
272 prev = nodes[0]
273 i = 1
274 while i < len(nodes):
275 node = nodes[i]
276 if not node.cmp_entry(prev):
277 prev.links.append(node.links[0])
278 del nodes[i]
Fred Drakeec561091998-03-27 05:25:43 +0000279 else:
280 i = i + 1
281 prev = node
282
283
284def dump(nodes, fp):
285 for node in nodes:
286 fp.write(node.dump())
287
288
289def main():
Fred Drake058068d1998-04-08 23:12:51 +0000290 import getopt
291 ifn = "-"
292 ofn = "-"
Fred Drake7cbf4621998-08-07 19:50:13 +0000293 columns = 1
Fred Drake077fffa1998-08-07 20:49:54 +0000294 letters = 0
295 opts, args = getopt.getopt(sys.argv[1:], "c:lo:",
296 ["columns=", "letters", "output="])
Fred Drake058068d1998-04-08 23:12:51 +0000297 for opt, val in opts:
298 if opt in ("-o", "--output"):
299 ofn = val
Fred Drake7cbf4621998-08-07 19:50:13 +0000300 elif opt in ("-c", "--columns"):
301 columns = string.atoi(val)
Fred Drake077fffa1998-08-07 20:49:54 +0000302 elif opt in ("-l", "--letters"):
303 letters = 1
Fred Drake058068d1998-04-08 23:12:51 +0000304 if not args:
305 args = [ifn]
306 nodes = []
307 for fn in args:
308 nodes = nodes + load(open(fn))
Fred Drake03a02351998-12-28 20:46:53 +0000309 num_nodes = len(nodes)
Fred Drakeec561091998-03-27 05:25:43 +0000310 nodes.sort()
Fred Drakeec561091998-03-27 05:25:43 +0000311 collapse(nodes)
Fred Drake077fffa1998-08-07 20:49:54 +0000312 if letters:
313 html = format_html_letters(nodes, columns)
314 else:
315 html = format_html(nodes, columns)
Fred Drake03a02351998-12-28 20:46:53 +0000316 program = os.path.basename(sys.argv[0])
Fred Drake058068d1998-04-08 23:12:51 +0000317 if ofn == "-":
318 sys.stdout.write(html)
Fred Drake03a02351998-12-28 20:46:53 +0000319 sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes))
Fred Drake058068d1998-04-08 23:12:51 +0000320 else:
321 open(ofn, "w").write(html)
Fred Drake03a02351998-12-28 20:46:53 +0000322 print
323 print "%s: %d index nodes" % (program, num_nodes)
Fred Drakeec561091998-03-27 05:25:43 +0000324
325
326if __name__ == "__main__":
327 main()