blob: 6b1682f8c1a21247d3ebae09a4f9ff9b5383429d [file] [log] [blame]
Fred Drakeec561091998-03-27 05:25:43 +00001#! /usr/bin/env python
2
Fred Drakeec561091998-03-27 05:25:43 +00003__version__ = '$Revision$'
4
Fred Drake03a02351998-12-28 20:46:53 +00005import os
Fred Drakeec561091998-03-27 05:25:43 +00006import re
7import string
8import sys
9
10
11class Node:
Fred Drakeec561091998-03-27 05:25:43 +000012 __rmjunk = re.compile("<#\d+#>")
13
14 def __init__(self, link, str, seqno):
15 self.links = [link]
16 self.seqno = seqno
17 # remove <#\d+#> left in by moving the data out of LaTeX2HTML
18 str = self.__rmjunk.sub('', str)
Fred Drakeec561091998-03-27 05:25:43 +000019 # build up the text
Fred Drake058068d1998-04-08 23:12:51 +000020 self.text = split_entry_text(str)
21 self.key = split_entry_key(str)
Fred Drakeec561091998-03-27 05:25:43 +000022
23 def __cmp__(self, other):
24 """Comparison operator includes sequence number, for use with
25 list.sort()."""
26 return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
27
28 def cmp_entry(self, other):
29 """Comparison 'operator' that ignores sequence number."""
Fred Drake058068d1998-04-08 23:12:51 +000030 c = 0
Fred Drakeec561091998-03-27 05:25:43 +000031 for i in range(min(len(self.key), len(other.key))):
Fred Drake058068d1998-04-08 23:12:51 +000032 c = (cmp_part(self.key[i], other.key[i])
33 or cmp_part(self.text[i], other.text[i]))
Fred Drakeec561091998-03-27 05:25:43 +000034 if c:
Fred Drake058068d1998-04-08 23:12:51 +000035 break
36 return c or cmp(self.key, other.key) or cmp(self.text, other.text)
Fred Drakeec561091998-03-27 05:25:43 +000037
38 def __repr__(self):
39 return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno)
40
41 def __str__(self):
42 return string.join(self.key, '!')
43
44 def dump(self):
Fred Drake058068d1998-04-08 23:12:51 +000045 return "%s\1%s###%s\n" \
46 % (string.join(self.links, "\1"),
Fred Drakeec561091998-03-27 05:25:43 +000047 string.join(self.text, '!'),
48 self.seqno)
49
50
Fred Drake058068d1998-04-08 23:12:51 +000051def cmp_part(s1, s2):
52 result = cmp(s1, s2)
53 if result == 0:
54 return 0
55 l1 = string.lower(s1)
56 l2 = string.lower(s2)
57 minlen = min(len(s1), len(s2))
58 if len(s1) < len(s2) and l1 == l2[:len(s1)]:
59 result = -1
60 elif len(s2) < len(s1) and l2 == l1[:len(s2)]:
61 result = 1
62 else:
63 result = cmp(l1, l2) or cmp(s1, s2)
64 return result
65
66
67def split_entry(str, which):
68 stuff = []
69 parts = string.split(str, '!')
70 parts = map(string.split, parts, ['@'] * len(parts))
71 for entry in parts:
72 if len(entry) != 1:
73 key = entry[which]
74 else:
75 key = entry[0]
76 stuff.append(key)
77 return stuff
78
79
80_rmtt = re.compile(r"(.*)<tt>(.*)</tt>(.*)$", re.IGNORECASE)
81_rmparens = re.compile(r"\(\)")
82
83def split_entry_key(str):
84 parts = split_entry(str, 1)
85 for i in range(len(parts)):
86 m = _rmtt.match(parts[i])
87 if m:
88 parts[i] = string.join(m.group(1, 2, 3), '')
89 else:
90 parts[i] = string.lower(parts[i])
91 # remove '()' from the key:
92 parts[i] = _rmparens.sub('', parts[i])
93 return map(trim_ignored_letters, parts)
94
95
96def split_entry_text(str):
97 if '<' in str:
98 m = _rmtt.match(str)
99 if m:
100 str = string.join(m.group(1, 2, 3), '')
101 return split_entry(str, 1)
102
103
Fred Drakeec561091998-03-27 05:25:43 +0000104def load(fp):
105 nodes = []
Fred Drake058068d1998-04-08 23:12:51 +0000106 rx = re.compile("(.*)\1(.*)###(.*)$")
Fred Drakeec561091998-03-27 05:25:43 +0000107 while 1:
108 line = fp.readline()
109 if not line:
110 break
111 m = rx.match(line)
112 if m:
113 link, str, seqno = m.group(1, 2, 3)
114 nodes.append(Node(link, str, seqno))
115 return nodes
116
117
Fred Drake058068d1998-04-08 23:12:51 +0000118def trim_ignored_letters(s):
Fred Drake3b074801999-01-04 22:00:56 +0000119 # ignore $ to keep environment variables with the
120 # leading letter from the name
Fred Drake058068d1998-04-08 23:12:51 +0000121 s = string.lower(s)
Fred Drake3b074801999-01-04 22:00:56 +0000122 if s[0] == "$":
123 return s[1:]
124 else:
125 return s
Fred Drake058068d1998-04-08 23:12:51 +0000126
127def get_first_letter(s):
128 return string.lower(trim_ignored_letters(s)[0])
129
130
Fred Drakeec561091998-03-27 05:25:43 +0000131def split_letters(nodes):
132 letter_groups = []
Fred Drakeec561091998-03-27 05:25:43 +0000133 if nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000134 group = []
135 append = group.append
136 letter = get_first_letter(nodes[0].text[0])
Fred Drakeec561091998-03-27 05:25:43 +0000137 letter_groups.append((letter, group))
138 for node in nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000139 nletter = get_first_letter(node.text[0])
Fred Drakeec561091998-03-27 05:25:43 +0000140 if letter != nletter:
141 letter = nletter
142 group = []
143 letter_groups.append((letter, group))
144 append = group.append
145 append(node)
146 return letter_groups
147
148
Fred Drake7cbf4621998-08-07 19:50:13 +0000149# need a function to separate the nodes into columns...
150def split_columns(nodes, columns=1):
151 if columns <= 1:
Fred Drake077fffa1998-08-07 20:49:54 +0000152 return [nodes]
Fred Drake7cbf4621998-08-07 19:50:13 +0000153 # This is a rough height; we may have to increase to avoid breaks before
154 # a subitem.
155 colheight = len(nodes) / columns
156 numlong = len(nodes) % columns
157 if numlong:
158 colheight = colheight + 1
159 else:
160 numlong = columns
161 cols = []
162 for i in range(numlong):
163 start = i * colheight
164 end = start + colheight
165 cols.append(nodes[start:end])
166 del nodes[:end]
167 colheight = colheight - 1
168 try:
169 numshort = len(nodes) / colheight
170 except ZeroDivisionError:
171 cols = cols + (columns - len(cols)) * [[]]
172 else:
173 for i in range(numshort):
174 start = i * colheight
175 end = start + colheight
176 cols.append(nodes[start:end])
Fred Drake077fffa1998-08-07 20:49:54 +0000177 return cols
Fred Drake7cbf4621998-08-07 19:50:13 +0000178
179
Fred Drake058068d1998-04-08 23:12:51 +0000180DL_LEVEL_INDENT = " "
181
Fred Drake7cbf4621998-08-07 19:50:13 +0000182def format_column(nodes):
Fred Drakeec561091998-03-27 05:25:43 +0000183 strings = ["<dl compact>"]
184 append = strings.append
Fred Drake7cbf4621998-08-07 19:50:13 +0000185 level = 0
Fred Drake058068d1998-04-08 23:12:51 +0000186 previous = []
Fred Drakeec561091998-03-27 05:25:43 +0000187 for node in nodes:
Fred Drake058068d1998-04-08 23:12:51 +0000188 current = node.text
189 count = 0
190 for i in range(min(len(current), len(previous))):
191 if previous[i] != current[i]:
192 break
193 count = i + 1
194 if count > level:
195 append("<dl compact>" * (count - level) + "\n")
196 level = count
197 elif level > count:
198 append("\n")
199 append(level * DL_LEVEL_INDENT)
200 append("</dl>" * (level - count))
201 level = count
202 # else: level == count
203 for i in range(count, len(current) - 1):
204 term = node.text[i]
205 level = level + 1
206 append("\n<dt>%s\n<dd>\n%s<dl compact>"
207 % (term, level * DL_LEVEL_INDENT))
208 append("\n%s<dt>%s%s</a>"
209 % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1]))
Fred Drakeec561091998-03-27 05:25:43 +0000210 for link in node.links[1:]:
Fred Drake058068d1998-04-08 23:12:51 +0000211 append(",\n%s %s[Link]</a>" % (level * DL_LEVEL_INDENT, link))
212 previous = current
213 append("\n")
Fred Drake2b8c95e1998-04-11 16:26:02 +0000214 append("</dl>" * (level + 1))
Fred Drake7cbf4621998-08-07 19:50:13 +0000215 return string.join(strings, '')
216
217
218def format_nodes(nodes, columns=1):
219 strings = []
220 append = strings.append
221 if columns > 1:
222 colnos = range(columns)
223 colheight = len(nodes) / columns
224 if len(nodes) % columns:
225 colheight = colheight + 1
226 colwidth = 100 / columns
227 append('<table width="100%"><tr valign="top">')
228 for col in split_columns(nodes, columns):
229 append('<td width="%d%%">\n' % colwidth)
230 append(format_column(col))
231 append("\n</td>")
232 append("\n</tr></table>")
233 else:
234 append(format_column(nodes))
Fred Drake2b8c95e1998-04-11 16:26:02 +0000235 append("\n<p>\n")
Fred Drake058068d1998-04-08 23:12:51 +0000236 return string.join(strings, '')
Fred Drakeec561091998-03-27 05:25:43 +0000237
238
239def format_letter(letter):
240 if letter == '.':
241 lettername = ". (dot)"
242 elif letter == '_':
243 lettername = "_ (underscore)"
244 else:
245 lettername = string.upper(letter)
Fred Drake058068d1998-04-08 23:12:51 +0000246 return "\n<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \
Fred Drakeec561091998-03-27 05:25:43 +0000247 % (letter, lettername)
248
249
Fred Drake077fffa1998-08-07 20:49:54 +0000250def format_html_letters(nodes, columns=1):
Fred Drakeec561091998-03-27 05:25:43 +0000251 letter_groups = split_letters(nodes)
252 items = []
253 for letter, nodes in letter_groups:
254 s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
255 items.append(s)
Fred Drake058068d1998-04-08 23:12:51 +0000256 s = ["<hr><center>\n%s</center>\n" % string.join(items, " |\n")]
Fred Drakeec561091998-03-27 05:25:43 +0000257 for letter, nodes in letter_groups:
Fred Drake058068d1998-04-08 23:12:51 +0000258 s.append(format_letter(letter))
Fred Drake7cbf4621998-08-07 19:50:13 +0000259 s.append(format_nodes(nodes, columns))
Fred Drake058068d1998-04-08 23:12:51 +0000260 return string.join(s, '')
Fred Drakeec561091998-03-27 05:25:43 +0000261
Fred Drake077fffa1998-08-07 20:49:54 +0000262def format_html(nodes, columns):
263 return format_nodes(nodes, columns)
264
Fred Drakeec561091998-03-27 05:25:43 +0000265
266def collapse(nodes):
267 """Collapse sequences of nodes with matching keys into a single node.
268 Destructive."""
269 if len(nodes) < 2:
270 return
271 prev = nodes[0]
272 i = 1
273 while i < len(nodes):
274 node = nodes[i]
275 if not node.cmp_entry(prev):
276 prev.links.append(node.links[0])
277 del nodes[i]
Fred Drakeec561091998-03-27 05:25:43 +0000278 else:
279 i = i + 1
280 prev = node
281
282
283def dump(nodes, fp):
284 for node in nodes:
285 fp.write(node.dump())
286
287
288def main():
Fred Drake058068d1998-04-08 23:12:51 +0000289 import getopt
290 ifn = "-"
291 ofn = "-"
Fred Drake7cbf4621998-08-07 19:50:13 +0000292 columns = 1
Fred Drake077fffa1998-08-07 20:49:54 +0000293 letters = 0
294 opts, args = getopt.getopt(sys.argv[1:], "c:lo:",
295 ["columns=", "letters", "output="])
Fred Drake058068d1998-04-08 23:12:51 +0000296 for opt, val in opts:
297 if opt in ("-o", "--output"):
298 ofn = val
Fred Drake7cbf4621998-08-07 19:50:13 +0000299 elif opt in ("-c", "--columns"):
300 columns = string.atoi(val)
Fred Drake077fffa1998-08-07 20:49:54 +0000301 elif opt in ("-l", "--letters"):
302 letters = 1
Fred Drake058068d1998-04-08 23:12:51 +0000303 if not args:
304 args = [ifn]
305 nodes = []
306 for fn in args:
307 nodes = nodes + load(open(fn))
Fred Drake03a02351998-12-28 20:46:53 +0000308 num_nodes = len(nodes)
Fred Drakeec561091998-03-27 05:25:43 +0000309 nodes.sort()
Fred Drakeec561091998-03-27 05:25:43 +0000310 collapse(nodes)
Fred Drake077fffa1998-08-07 20:49:54 +0000311 if letters:
312 html = format_html_letters(nodes, columns)
313 else:
314 html = format_html(nodes, columns)
Fred Drake03a02351998-12-28 20:46:53 +0000315 program = os.path.basename(sys.argv[0])
Fred Drake058068d1998-04-08 23:12:51 +0000316 if ofn == "-":
317 sys.stdout.write(html)
Fred Drake03a02351998-12-28 20:46:53 +0000318 sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes))
Fred Drake058068d1998-04-08 23:12:51 +0000319 else:
320 open(ofn, "w").write(html)
Fred Drake03a02351998-12-28 20:46:53 +0000321 print
322 print "%s: %d index nodes" % (program, num_nodes)
Fred Drakeec561091998-03-27 05:25:43 +0000323
324
325if __name__ == "__main__":
326 main()