blob: 01805f34b338538019330f466fad46cb811ceb23 [file] [log] [blame]
Fred Drakeec561091998-03-27 05:25:43 +00001#! /usr/bin/env python
2
3"""
4"""
5__version__ = '$Revision$'
6
7import re
8import string
9import sys
10
11
12class Node:
Fred Drakeec561091998-03-27 05:25:43 +000013 __rmjunk = re.compile("<#\d+#>")
14
15 def __init__(self, link, str, seqno):
16 self.links = [link]
17 self.seqno = seqno
18 # remove <#\d+#> left in by moving the data out of LaTeX2HTML
19 str = self.__rmjunk.sub('', str)
20 # now remove <tt>...</tt> markup; contents remain.
Fred Drakeec561091998-03-27 05:25:43 +000021 # build up the text
Fred Drake058068d1998-04-08 23:12:51 +000022 self.text = split_entry_text(str)
23 self.key = split_entry_key(str)
Fred Drakeec561091998-03-27 05:25:43 +000024
25 def __cmp__(self, other):
26 """Comparison operator includes sequence number, for use with
27 list.sort()."""
28 return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
29
30 def cmp_entry(self, other):
31 """Comparison 'operator' that ignores sequence number."""
Fred Drake058068d1998-04-08 23:12:51 +000032 c = 0
Fred Drakeec561091998-03-27 05:25:43 +000033 for i in range(min(len(self.key), len(other.key))):
Fred Drake058068d1998-04-08 23:12:51 +000034 c = (cmp_part(self.key[i], other.key[i])
35 or cmp_part(self.text[i], other.text[i]))
Fred Drakeec561091998-03-27 05:25:43 +000036 if c:
Fred Drake058068d1998-04-08 23:12:51 +000037 break
38 return c or cmp(self.key, other.key) or cmp(self.text, other.text)
Fred Drakeec561091998-03-27 05:25:43 +000039
40 def __repr__(self):
41 return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno)
42
43 def __str__(self):
44 return string.join(self.key, '!')
45
46 def dump(self):
Fred Drake058068d1998-04-08 23:12:51 +000047 return "%s\1%s###%s\n" \
48 % (string.join(self.links, "\1"),
Fred Drakeec561091998-03-27 05:25:43 +000049 string.join(self.text, '!'),
50 self.seqno)
51
52
Fred Drake058068d1998-04-08 23:12:51 +000053def cmp_part(s1, s2):
54 result = cmp(s1, s2)
55 if result == 0:
56 return 0
57 l1 = string.lower(s1)
58 l2 = string.lower(s2)
59 minlen = min(len(s1), len(s2))
60 if len(s1) < len(s2) and l1 == l2[:len(s1)]:
61 result = -1
62 elif len(s2) < len(s1) and l2 == l1[:len(s2)]:
63 result = 1
64 else:
65 result = cmp(l1, l2) or cmp(s1, s2)
66 return result
67
68
69def split_entry(str, which):
70 stuff = []
71 parts = string.split(str, '!')
72 parts = map(string.split, parts, ['@'] * len(parts))
73 for entry in parts:
74 if len(entry) != 1:
75 key = entry[which]
76 else:
77 key = entry[0]
78 stuff.append(key)
79 return stuff
80
81
82_rmtt = re.compile(r"(.*)<tt>(.*)</tt>(.*)$", re.IGNORECASE)
83_rmparens = re.compile(r"\(\)")
84
85def split_entry_key(str):
86 parts = split_entry(str, 1)
87 for i in range(len(parts)):
88 m = _rmtt.match(parts[i])
89 if m:
90 parts[i] = string.join(m.group(1, 2, 3), '')
91 else:
92 parts[i] = string.lower(parts[i])
93 # remove '()' from the key:
94 parts[i] = _rmparens.sub('', parts[i])
95 return map(trim_ignored_letters, parts)
96
97
98def split_entry_text(str):
99 if '<' in str:
100 m = _rmtt.match(str)
101 if m:
102 str = string.join(m.group(1, 2, 3), '')
103 return split_entry(str, 1)
104
105
Fred Drakeec561091998-03-27 05:25:43 +0000106def load(fp):
107 nodes = []
Fred Drake058068d1998-04-08 23:12:51 +0000108 rx = re.compile("(.*)\1(.*)###(.*)$")
Fred Drakeec561091998-03-27 05:25:43 +0000109 while 1:
110 line = fp.readline()
111 if not line:
112 break
113 m = rx.match(line)
114 if m:
115 link, str, seqno = m.group(1, 2, 3)
116 nodes.append(Node(link, str, seqno))
117 return nodes
118
119
Fred Drake058068d1998-04-08 23:12:51 +0000120# ignore $ to keep environment variables with the leading letter from the name
121SKIP_LETTERS = "$"
122
123def trim_ignored_letters(s):
124 s = string.lower(s)
125 while s[0] in SKIP_LETTERS:
126 s = s[1:]
127 return s
128
129def get_first_letter(s):
130 return string.lower(trim_ignored_letters(s)[0])
131
132
Fred Drakeec561091998-03-27 05:25:43 +0000133def split_letters(nodes):
134 letter_groups = []
Fred Drakeec561091998-03-27 05:25:43 +0000135 if nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000136 group = []
137 append = group.append
138 letter = get_first_letter(nodes[0].text[0])
Fred Drakeec561091998-03-27 05:25:43 +0000139 letter_groups.append((letter, group))
140 for node in nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000141 nletter = get_first_letter(node.text[0])
Fred Drakeec561091998-03-27 05:25:43 +0000142 if letter != nletter:
143 letter = nletter
144 group = []
145 letter_groups.append((letter, group))
146 append = group.append
147 append(node)
148 return letter_groups
149
150
Fred Drake7cbf4621998-08-07 19:50:13 +0000151# need a function to separate the nodes into columns...
152def split_columns(nodes, columns=1):
153 if columns <= 1:
Fred Drake077fffa1998-08-07 20:49:54 +0000154 return [nodes]
Fred Drake7cbf4621998-08-07 19:50:13 +0000155 # This is a rough height; we may have to increase to avoid breaks before
156 # a subitem.
157 colheight = len(nodes) / columns
158 numlong = len(nodes) % columns
159 if numlong:
160 colheight = colheight + 1
161 else:
162 numlong = columns
163 cols = []
164 for i in range(numlong):
165 start = i * colheight
166 end = start + colheight
167 cols.append(nodes[start:end])
168 del nodes[:end]
169 colheight = colheight - 1
170 try:
171 numshort = len(nodes) / colheight
172 except ZeroDivisionError:
173 cols = cols + (columns - len(cols)) * [[]]
174 else:
175 for i in range(numshort):
176 start = i * colheight
177 end = start + colheight
178 cols.append(nodes[start:end])
Fred Drake077fffa1998-08-07 20:49:54 +0000179 return cols
Fred Drake7cbf4621998-08-07 19:50:13 +0000180
181
Fred Drake058068d1998-04-08 23:12:51 +0000182DL_LEVEL_INDENT = " "
183
Fred Drake7cbf4621998-08-07 19:50:13 +0000184def format_column(nodes):
Fred Drakeec561091998-03-27 05:25:43 +0000185 strings = ["<dl compact>"]
186 append = strings.append
Fred Drake7cbf4621998-08-07 19:50:13 +0000187 level = 0
Fred Drake058068d1998-04-08 23:12:51 +0000188 previous = []
Fred Drakeec561091998-03-27 05:25:43 +0000189 for node in nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000190 current = node.text
191 count = 0
192 for i in range(min(len(current), len(previous))):
193 if previous[i] != current[i]:
194 break
195 count = i + 1
196 if count > level:
197 append("<dl compact>" * (count - level) + "\n")
198 level = count
199 elif level > count:
200 append("\n")
201 append(level * DL_LEVEL_INDENT)
202 append("</dl>" * (level - count))
203 level = count
204 # else: level == count
205 for i in range(count, len(current) - 1):
206 term = node.text[i]
207 level = level + 1
208 append("\n<dt>%s\n<dd>\n%s<dl compact>"
209 % (term, level * DL_LEVEL_INDENT))
210 append("\n%s<dt>%s%s</a>"
211 % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1]))
Fred Drakeec561091998-03-27 05:25:43 +0000212 for link in node.links[1:]:
Fred Drake058068d1998-04-08 23:12:51 +0000213 append(",\n%s %s[Link]</a>" % (level * DL_LEVEL_INDENT, link))
214 previous = current
215 append("\n")
Fred Drake2b8c95e1998-04-11 16:26:02 +0000216 append("</dl>" * (level + 1))
Fred Drake7cbf4621998-08-07 19:50:13 +0000217 return string.join(strings, '')
218
219
220def format_nodes(nodes, columns=1):
221 strings = []
222 append = strings.append
223 if columns > 1:
224 colnos = range(columns)
225 colheight = len(nodes) / columns
226 if len(nodes) % columns:
227 colheight = colheight + 1
228 colwidth = 100 / columns
229 append('<table width="100%"><tr valign="top">')
230 for col in split_columns(nodes, columns):
231 append('<td width="%d%%">\n' % colwidth)
232 append(format_column(col))
233 append("\n</td>")
234 append("\n</tr></table>")
235 else:
236 append(format_column(nodes))
Fred Drake2b8c95e1998-04-11 16:26:02 +0000237 append("\n<p>\n")
Fred Drake058068d1998-04-08 23:12:51 +0000238 return string.join(strings, '')
Fred Drakeec561091998-03-27 05:25:43 +0000239
240
241def format_letter(letter):
242 if letter == '.':
243 lettername = ". (dot)"
244 elif letter == '_':
245 lettername = "_ (underscore)"
246 else:
247 lettername = string.upper(letter)
Fred Drake058068d1998-04-08 23:12:51 +0000248 return "\n<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \
Fred Drakeec561091998-03-27 05:25:43 +0000249 % (letter, lettername)
250
251
Fred Drake077fffa1998-08-07 20:49:54 +0000252def format_html_letters(nodes, columns=1):
Fred Drakeec561091998-03-27 05:25:43 +0000253 letter_groups = split_letters(nodes)
254 items = []
255 for letter, nodes in letter_groups:
256 s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
257 items.append(s)
Fred Drake058068d1998-04-08 23:12:51 +0000258 s = ["<hr><center>\n%s</center>\n" % string.join(items, " |\n")]
Fred Drakeec561091998-03-27 05:25:43 +0000259 for letter, nodes in letter_groups:
Fred Drake058068d1998-04-08 23:12:51 +0000260 s.append(format_letter(letter))
Fred Drake7cbf4621998-08-07 19:50:13 +0000261 s.append(format_nodes(nodes, columns))
Fred Drake058068d1998-04-08 23:12:51 +0000262 return string.join(s, '')
Fred Drakeec561091998-03-27 05:25:43 +0000263
Fred Drake077fffa1998-08-07 20:49:54 +0000264def format_html(nodes, columns):
265 return format_nodes(nodes, columns)
266
Fred Drakeec561091998-03-27 05:25:43 +0000267
268def collapse(nodes):
269 """Collapse sequences of nodes with matching keys into a single node.
270 Destructive."""
271 if len(nodes) < 2:
272 return
273 prev = nodes[0]
274 i = 1
275 while i < len(nodes):
276 node = nodes[i]
277 if not node.cmp_entry(prev):
278 prev.links.append(node.links[0])
279 del nodes[i]
Fred Drakeec561091998-03-27 05:25:43 +0000280 else:
281 i = i + 1
282 prev = node
283
284
285def dump(nodes, fp):
286 for node in nodes:
287 fp.write(node.dump())
288
289
290def main():
Fred Drake058068d1998-04-08 23:12:51 +0000291 import getopt
292 ifn = "-"
293 ofn = "-"
Fred Drake7cbf4621998-08-07 19:50:13 +0000294 columns = 1
Fred Drake077fffa1998-08-07 20:49:54 +0000295 letters = 0
296 opts, args = getopt.getopt(sys.argv[1:], "c:lo:",
297 ["columns=", "letters", "output="])
Fred Drake058068d1998-04-08 23:12:51 +0000298 for opt, val in opts:
299 if opt in ("-o", "--output"):
300 ofn = val
Fred Drake7cbf4621998-08-07 19:50:13 +0000301 elif opt in ("-c", "--columns"):
302 columns = string.atoi(val)
Fred Drake077fffa1998-08-07 20:49:54 +0000303 elif opt in ("-l", "--letters"):
304 letters = 1
Fred Drake058068d1998-04-08 23:12:51 +0000305 if not args:
306 args = [ifn]
307 nodes = []
308 for fn in args:
309 nodes = nodes + load(open(fn))
Fred Drakeec561091998-03-27 05:25:43 +0000310 nodes.sort()
Fred Drakeec561091998-03-27 05:25:43 +0000311 collapse(nodes)
Fred Drake077fffa1998-08-07 20:49:54 +0000312 if letters:
313 html = format_html_letters(nodes, columns)
314 else:
315 html = format_html(nodes, columns)
Fred Drake058068d1998-04-08 23:12:51 +0000316 if ofn == "-":
317 sys.stdout.write(html)
318 else:
319 open(ofn, "w").write(html)
Fred Drakeec561091998-03-27 05:25:43 +0000320
321
322if __name__ == "__main__":
323 main()