blob: 7bda92962252a6be49af0784f84dcffafb14b299 [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
3"""Convert ESIS events to SGML or XML markup.
4
5This is limited, but seems sufficient for the ESIS generated by the
6latex2esis.py script when run over the Python documentation.
7"""
Fred Drake607aed71999-02-18 16:30:16 +00008
9# This should have an explicit option to indicate whether the *INPUT* was
10# generated from an SGML or an XML application.
11
Fred Drake30a68c71998-11-23 16:59:39 +000012import errno
Fred Drakef077b9d1998-12-01 19:01:53 +000013import esistools
Fred Drake36dfe581999-01-19 23:03:04 +000014import os
Fred Drake30a68c71998-11-23 16:59:39 +000015import re
16import string
17
Fred Drakea4699a72001-03-23 16:38:12 +000018from xml.sax.saxutils import escape
Fred Drake79ad1f11999-01-14 17:06:09 +000019
Fred Drake30a68c71998-11-23 16:59:39 +000020
Fred Drake607aed71999-02-18 16:30:16 +000021AUTOCLOSE = ()
22
Fred Drake36dfe581999-01-19 23:03:04 +000023EMPTIES_FILENAME = "../sgml/empties.dat"
24LIST_EMPTIES = 0
25
26
Fred Drake607aed71999-02-18 16:30:16 +000027_elem_map = {}
28_attr_map = {}
29_token_map = {}
30
31_normalize_case = str
32
33def map_gi(sgmlgi, map):
34 uncased = _normalize_case(sgmlgi)
35 try:
36 return map[uncased]
37 except IndexError:
38 map[uncased] = sgmlgi
39 return sgmlgi
40
41def null_map_gi(sgmlgi, map):
42 return sgmlgi
43
44
Fred Drakef82e4ab1999-01-19 17:10:31 +000045def format_attrs(attrs, xml=0):
Fred Drake30a68c71998-11-23 16:59:39 +000046 attrs = attrs.items()
47 attrs.sort()
Fred Drake607aed71999-02-18 16:30:16 +000048 parts = []
49 append = parts.append
Fred Drake30a68c71998-11-23 16:59:39 +000050 for name, value in attrs:
Fred Drakef82e4ab1999-01-19 17:10:31 +000051 if xml:
Fred Drake607aed71999-02-18 16:30:16 +000052 append('%s="%s"' % (name, escape(value)))
Fred Drakef82e4ab1999-01-19 17:10:31 +000053 else:
54 # this is a little bogus, but should do for now
55 if name == value and isnmtoken(value):
Fred Drake607aed71999-02-18 16:30:16 +000056 append(value)
Fred Drakef82e4ab1999-01-19 17:10:31 +000057 elif istoken(value):
Fred Drake279ca751999-01-29 21:35:50 +000058 if value == "no" + name:
Fred Drake607aed71999-02-18 16:30:16 +000059 append(value)
Fred Drake279ca751999-01-29 21:35:50 +000060 else:
Fred Drake607aed71999-02-18 16:30:16 +000061 append("%s=%s" % (name, value))
Fred Drakef82e4ab1999-01-19 17:10:31 +000062 else:
Fred Drake607aed71999-02-18 16:30:16 +000063 append('%s="%s"' % (name, escape(value)))
64 if parts:
65 parts.insert(0, '')
66 return string.join(parts)
Fred Drake30a68c71998-11-23 16:59:39 +000067
68
Fred Drake36dfe581999-01-19 23:03:04 +000069_nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE)
Fred Drakef82e4ab1999-01-19 17:10:31 +000070def isnmtoken(s):
71 return _nmtoken_rx.match(s) is not None
72
Fred Drake36dfe581999-01-19 23:03:04 +000073_token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE)
Fred Drakef82e4ab1999-01-19 17:10:31 +000074def istoken(s):
75 return _token_rx.match(s) is not None
76
77
Fred Drakef032cdb1999-07-29 22:03:52 +000078def convert(ifp, ofp, xml=0, autoclose=(), verbatims=()):
Fred Drake43278f01999-01-20 20:35:05 +000079 if xml:
80 autoclose = ()
Fred Drake30a68c71998-11-23 16:59:39 +000081 attrs = {}
82 lastopened = None
Fred Drake4abcffb1998-12-10 18:31:37 +000083 knownempties = []
Fred Drake30a68c71998-11-23 16:59:39 +000084 knownempty = 0
85 lastempty = 0
Fred Drakec4811d81999-05-18 17:34:51 +000086 inverbatim = 0
Fred Drake30a68c71998-11-23 16:59:39 +000087 while 1:
88 line = ifp.readline()
89 if not line:
90 break
91
92 type = line[0]
93 data = line[1:]
94 if data and data[-1] == "\n":
95 data = data[:-1]
96 if type == "-":
Fred Drakef077b9d1998-12-01 19:01:53 +000097 data = esistools.decode(data)
Fred Drakec4811d81999-05-18 17:34:51 +000098 data = escape(data)
99 if not inverbatim:
100 data = string.replace(data, "---", "—")
101 ofp.write(data)
Fred Drake30a68c71998-11-23 16:59:39 +0000102 if "\n" in data:
103 lastopened = None
104 knownempty = 0
105 lastempty = 0
106 elif type == "(":
Fred Drakef077b9d1998-12-01 19:01:53 +0000107 if data == "COMMENT":
108 ofp.write("<!--")
109 continue
Fred Drake607aed71999-02-18 16:30:16 +0000110 data = map_gi(data, _elem_map)
Fred Drake30a68c71998-11-23 16:59:39 +0000111 if knownempty and xml:
Fred Drakef82e4ab1999-01-19 17:10:31 +0000112 ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml)))
Fred Drake30a68c71998-11-23 16:59:39 +0000113 else:
Fred Drakef82e4ab1999-01-19 17:10:31 +0000114 ofp.write("<%s%s>" % (data, format_attrs(attrs, xml)))
Fred Drake30a68c71998-11-23 16:59:39 +0000115 if knownempty and data not in knownempties:
116 # accumulate knowledge!
117 knownempties.append(data)
118 attrs = {}
119 lastopened = data
120 lastempty = knownempty
121 knownempty = 0
Fred Drakec4811d81999-05-18 17:34:51 +0000122 inverbatim = data in verbatims
Fred Drake30a68c71998-11-23 16:59:39 +0000123 elif type == ")":
Fred Drakef077b9d1998-12-01 19:01:53 +0000124 if data == "COMMENT":
125 ofp.write("-->")
126 continue
Fred Drake607aed71999-02-18 16:30:16 +0000127 data = map_gi(data, _elem_map)
Fred Drake30a68c71998-11-23 16:59:39 +0000128 if xml:
129 if not lastempty:
130 ofp.write("</%s>" % data)
131 elif data not in knownempties:
Fred Drake43278f01999-01-20 20:35:05 +0000132 if data in autoclose:
133 pass
134 elif lastopened == data:
Fred Drake30a68c71998-11-23 16:59:39 +0000135 ofp.write("</>")
136 else:
137 ofp.write("</%s>" % data)
138 lastopened = None
139 lastempty = 0
Fred Drakec4811d81999-05-18 17:34:51 +0000140 inverbatim = 0
Fred Drake30a68c71998-11-23 16:59:39 +0000141 elif type == "A":
142 name, type, value = string.split(data, " ", 2)
Fred Drake607aed71999-02-18 16:30:16 +0000143 name = map_gi(name, _attr_map)
Fred Drakef077b9d1998-12-01 19:01:53 +0000144 attrs[name] = esistools.decode(value)
Fred Drake30a68c71998-11-23 16:59:39 +0000145 elif type == "e":
146 knownempty = 1
Fred Drake53eae8e1999-08-26 17:50:26 +0000147 elif type == "&":
148 ofp.write("&%s;" % data)
149 knownempty = 0
150 else:
151 raise RuntimeError, "unrecognized ESIS event type: '%s'" % type
Fred Drake30a68c71998-11-23 16:59:39 +0000152
Fred Drake36dfe581999-01-19 23:03:04 +0000153 if LIST_EMPTIES:
Fred Drake607aed71999-02-18 16:30:16 +0000154 dump_empty_element_names(knownempties)
155
156
157def dump_empty_element_names(knownempties):
Fred Drakef032cdb1999-07-29 22:03:52 +0000158 d = {}
159 for gi in knownempties:
160 d[gi] = gi
Fred Drake607aed71999-02-18 16:30:16 +0000161 knownempties.append("")
162 if os.path.isfile(EMPTIES_FILENAME):
Fred Drakef032cdb1999-07-29 22:03:52 +0000163 fp = open(EMPTIES_FILENAME)
164 while 1:
165 line = fp.readline()
166 if not line:
167 break
168 gi = string.strip(line)
169 if gi:
170 d[gi] = gi
171 fp = open(EMPTIES_FILENAME, "w")
172 gilist = d.keys()
173 gilist.sort()
174 fp.write(string.join(gilist, "\n"))
175 fp.write("\n")
Fred Drake607aed71999-02-18 16:30:16 +0000176 fp.close()
Fred Drake36dfe581999-01-19 23:03:04 +0000177
Fred Drake30a68c71998-11-23 16:59:39 +0000178
Fred Drake607aed71999-02-18 16:30:16 +0000179def update_gi_map(map, names, fromsgml=1):
180 for name in string.split(names, ","):
181 if fromsgml:
182 uncased = string.lower(name)
183 else:
184 uncased = name
185 map[uncased] = name
Fred Drake30a68c71998-11-23 16:59:39 +0000186
187
188def main():
Fred Drakef077b9d1998-12-01 19:01:53 +0000189 import getopt
Fred Drake30a68c71998-11-23 16:59:39 +0000190 import sys
191 #
Fred Drake43278f01999-01-20 20:35:05 +0000192 autoclose = AUTOCLOSE
Fred Drake607aed71999-02-18 16:30:16 +0000193 xml = 1
Fred Drakef077b9d1998-12-01 19:01:53 +0000194 xmldecl = 0
Fred Drake607aed71999-02-18 16:30:16 +0000195 elem_names = ''
196 attr_names = ''
197 value_names = ''
Fred Drakec4811d81999-05-18 17:34:51 +0000198 verbatims = ('verbatim', 'interactive-session')
Fred Drake607aed71999-02-18 16:30:16 +0000199 opts, args = getopt.getopt(sys.argv[1:], "adesx",
200 ["autoclose=", "declare", "sgml", "xml",
201 "elements-map=", "attributes-map",
202 "values-map="])
Fred Drakef077b9d1998-12-01 19:01:53 +0000203 for opt, arg in opts:
204 if opt in ("-d", "--declare"):
205 xmldecl = 1
Fred Drake607aed71999-02-18 16:30:16 +0000206 elif opt == "-e":
207 global LIST_EMPTIES
208 LIST_EMPTIES = 1
209 elif opt in ("-s", "--sgml"):
210 xml = 0
Fred Drakef077b9d1998-12-01 19:01:53 +0000211 elif opt in ("-x", "--xml"):
212 xml = 1
Fred Drake43278f01999-01-20 20:35:05 +0000213 elif opt in ("-a", "--autoclose"):
214 autoclose = string.split(arg, ",")
Fred Drake607aed71999-02-18 16:30:16 +0000215 elif opt == "--elements-map":
216 elem_names = ("%s,%s" % (elem_names, arg))[1:]
217 elif opt == "--attributes-map":
218 attr_names = ("%s,%s" % (attr_names, arg))[1:]
219 elif opt == "--values-map":
220 value_names = ("%s,%s" % (value_names, arg))[1:]
221 #
222 # open input streams:
223 #
Fred Drakef077b9d1998-12-01 19:01:53 +0000224 if len(args) == 0:
Fred Drake30a68c71998-11-23 16:59:39 +0000225 ifp = sys.stdin
226 ofp = sys.stdout
Fred Drakef077b9d1998-12-01 19:01:53 +0000227 elif len(args) == 1:
228 ifp = open(args[0])
Fred Drake30a68c71998-11-23 16:59:39 +0000229 ofp = sys.stdout
Fred Drakef077b9d1998-12-01 19:01:53 +0000230 elif len(args) == 2:
231 ifp = open(args[0])
232 ofp = open(args[1], "w")
Fred Drake30a68c71998-11-23 16:59:39 +0000233 else:
234 usage()
235 sys.exit(2)
Fred Drake607aed71999-02-18 16:30:16 +0000236 #
237 # setup the name maps:
238 #
239 if elem_names or attr_names or value_names:
240 # assume the origin was SGML; ignore case of the names from the ESIS
241 # stream but set up conversion tables to get the case right on output
242 global _normalize_case
243 _normalize_case = string.lower
244 update_gi_map(_elem_map, string.split(elem_names, ","))
245 update_gi_map(_attr_map, string.split(attr_names, ","))
246 update_gi_map(_values_map, string.split(value_names, ","))
247 else:
248 global map_gi
249 map_gi = null_map_gi
250 #
251 # run the conversion:
252 #
Fred Drake30a68c71998-11-23 16:59:39 +0000253 try:
Fred Drakef077b9d1998-12-01 19:01:53 +0000254 if xml and xmldecl:
255 opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n')
Fred Drakef032cdb1999-07-29 22:03:52 +0000256 convert(ifp, ofp, xml=xml, autoclose=autoclose, verbatims=verbatims)
Fred Drake30a68c71998-11-23 16:59:39 +0000257 except IOError, (err, msg):
258 if err != errno.EPIPE:
259 raise
260
261
262if __name__ == "__main__":
263 main()