blob: a2c57b6a96495a5075ad7034617b365ffd744762 [file] [log] [blame]
Fred Drakeec561091998-03-27 05:25:43 +00001#! /usr/bin/env python
2
3"""
4"""
5__version__ = '$Revision$'
6
7import re
8import string
9import sys
10
11
12class Node:
13
14 __rmtt = re.compile(r"(.*)<tt>(.*)</tt>(.*)$", re.IGNORECASE)
15 __rmjunk = re.compile("<#\d+#>")
16
17 def __init__(self, link, str, seqno):
18 self.links = [link]
19 self.seqno = seqno
20 # remove <#\d+#> left in by moving the data out of LaTeX2HTML
21 str = self.__rmjunk.sub('', str)
22 # now remove <tt>...</tt> markup; contents remain.
23 if '<' in str:
24 m = self.__rmtt.match(str)
25 if m:
26 kstr = string.join(m.group(1, 2, 3), '')
27 else:
28 kstr = str
29 else:
30 kstr = str
31 kstr = string.lower(kstr)
32 # build up the text
33 self.text = []
34 parts = string.split(str, '!')
35 parts = map(string.split, parts, ['@'] * len(parts))
36 for entry in parts:
37 if len(entry) != 1:
38 key, text = entry
39 else:
40 text = entry[0]
41 self.text.append(text)
42 # Building the key must be separate since any <tt> has been stripped
43 # from the key, but can be avoided if both key and text sources are
44 # the same.
45 if kstr != str:
46 self.key = []
47 kparts = string.split(kstr, '!')
48 kparts = map(string.split, kparts, ['@'] * len(kparts))
49 for entry in kparts:
50 if len(entry) != 1:
51 key, text = entry
52 else:
53 key = entry[0]
54 self.key.append(key)
55 else:
56 self.key = self.text
57
58 def __cmp__(self, other):
59 """Comparison operator includes sequence number, for use with
60 list.sort()."""
61 return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
62
63 def cmp_entry(self, other):
64 """Comparison 'operator' that ignores sequence number."""
65 for i in range(min(len(self.key), len(other.key))):
66 c = (cmp(self.key[i], other.key[i])
67 or cmp(self.text[i], other.text[i]))
68 if c:
69 return c
70 return cmp(self.key, other.key)
71
72 def __repr__(self):
73 return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno)
74
75 def __str__(self):
76 return string.join(self.key, '!')
77
78 def dump(self):
79 return "%s\0%s###%s\n" \
80 % (string.join(self.links, "\0"),
81 string.join(self.text, '!'),
82 self.seqno)
83
84
85def load(fp):
86 nodes = []
87 rx = re.compile(r"(.*)\0(.*)###(.*)$")
88 while 1:
89 line = fp.readline()
90 if not line:
91 break
92 m = rx.match(line)
93 if m:
94 link, str, seqno = m.group(1, 2, 3)
95 nodes.append(Node(link, str, seqno))
96 return nodes
97
98
99def split_letters(nodes):
100 letter_groups = []
101 group = []
102 append = group.append
103 if nodes:
104 letter = nodes[0].key[0][0]
105 letter_groups.append((letter, group))
106 for node in nodes:
107 nletter = node.key[0][0]
108 if letter != nletter:
109 letter = nletter
110 group = []
111 letter_groups.append((letter, group))
112 append = group.append
113 append(node)
114 return letter_groups
115
116
117def format_nodes(nodes):
118 # Does not create multiple links to multiple targets for the same entry;
119 # uses a separate entry for each target. This is a bug.
120 level = 0
121 strings = ["<dl compact>"]
122 append = strings.append
123 prev = None
124 for node in nodes:
125 nlevel = len(node.key) - 1
126 if nlevel > level:
127 if prev is None or node.key[level] != prev.key[level]:
128 append("%s\n<dl compact>" % node.text[level])
129 else:
130 append("<dl compact>")
131 level = nlevel
132 elif nlevel < level:
133 append("</dl>" * (level - len(node.key) + 1))
134 level = nlevel
135 if prev is not None and node.key[level] != prev.key[level]:
136 append("</dl>")
137 else:
138 append("<dl compact>")
139 elif level:
140 if node.key[level-1] != prev.key[level-1]:
141 append("</dl>\n%s<dl compact>"
142 % node.text[level-1])
143 append("%s%s</a><br>" % (node.links[0], node.text[-1]))
144 for link in node.links[1:]:
145 strings[-1] = strings[-1][:-4] + ","
146 append(link + "[Link]</a><br>")
147 prev = node
148 append("</dl>" * (level + 1))
149 append("")
150 append("")
151 return string.join(strings, "\n")
152
153
154def format_letter(letter):
155 if letter == '.':
156 lettername = ". (dot)"
157 elif letter == '_':
158 lettername = "_ (underscore)"
159 else:
160 lettername = string.upper(letter)
161 return "<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \
162 % (letter, lettername)
163
164
165def format_html(nodes):
166 letter_groups = split_letters(nodes)
167 items = []
168 for letter, nodes in letter_groups:
169 s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
170 items.append(s)
171 s = "<hr><center>\n%s</center>\n" % string.join(items, " |\n")
172 for letter, nodes in letter_groups:
173 s = s + format_letter(letter) + format_nodes(nodes)
174 return s
175
176
177def collapse(nodes):
178 """Collapse sequences of nodes with matching keys into a single node.
179 Destructive."""
180 if len(nodes) < 2:
181 return
182 prev = nodes[0]
183 i = 1
184 while i < len(nodes):
185 node = nodes[i]
186 if not node.cmp_entry(prev):
187 prev.links.append(node.links[0])
188 del nodes[i]
189## sys.stderr.write("collapsing %s\n" % `node`)
190 else:
191 i = i + 1
192 prev = node
193
194
195def dump(nodes, fp):
196 for node in nodes:
197 fp.write(node.dump())
198
199
200def main():
201 fn = sys.argv[1]
202 nodes = load(open(fn))
203 nodes.sort()
204 dump(nodes, open(fn + ".dump-1", "w"))
205 collapse(nodes)
206 dump(nodes, open(fn + ".dump-2", "w"))
207 sys.stdout.write(format_html(nodes))
208
209
210if __name__ == "__main__":
211 main()