blob: adb887312a0790e8af3e637f1f8d3d3095551159 [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
3"""Convert ESIS events to SGML or XML markup.
4
5This is limited, but seems sufficient for the ESIS generated by the
6latex2esis.py script when run over the Python documentation.
7"""
Fred Drake607aed71999-02-18 16:30:16 +00008
9# This should have an explicit option to indicate whether the *INPUT* was
10# generated from an SGML or an XML application.
11
Fred Drake30a68c71998-11-23 16:59:39 +000012__version__ = '$Revision$'
13
14import errno
Fred Drakef077b9d1998-12-01 19:01:53 +000015import esistools
Fred Drake36dfe581999-01-19 23:03:04 +000016import os
Fred Drake30a68c71998-11-23 16:59:39 +000017import re
18import string
19
Fred Drake79ad1f11999-01-14 17:06:09 +000020from xml.utils import escape
21
Fred Drake30a68c71998-11-23 16:59:39 +000022
Fred Drake607aed71999-02-18 16:30:16 +000023AUTOCLOSE = ()
24
Fred Drake36dfe581999-01-19 23:03:04 +000025EMPTIES_FILENAME = "../sgml/empties.dat"
26LIST_EMPTIES = 0
27
28
Fred Drake607aed71999-02-18 16:30:16 +000029_elem_map = {}
30_attr_map = {}
31_token_map = {}
32
33_normalize_case = str
34
35def map_gi(sgmlgi, map):
36 uncased = _normalize_case(sgmlgi)
37 try:
38 return map[uncased]
39 except IndexError:
40 map[uncased] = sgmlgi
41 return sgmlgi
42
43def null_map_gi(sgmlgi, map):
44 return sgmlgi
45
46
Fred Drakef82e4ab1999-01-19 17:10:31 +000047def format_attrs(attrs, xml=0):
Fred Drake30a68c71998-11-23 16:59:39 +000048 attrs = attrs.items()
49 attrs.sort()
Fred Drake607aed71999-02-18 16:30:16 +000050 parts = []
51 append = parts.append
Fred Drake30a68c71998-11-23 16:59:39 +000052 for name, value in attrs:
Fred Drakef82e4ab1999-01-19 17:10:31 +000053 if xml:
Fred Drake607aed71999-02-18 16:30:16 +000054 append('%s="%s"' % (name, escape(value)))
Fred Drakef82e4ab1999-01-19 17:10:31 +000055 else:
56 # this is a little bogus, but should do for now
57 if name == value and isnmtoken(value):
Fred Drake607aed71999-02-18 16:30:16 +000058 append(value)
Fred Drakef82e4ab1999-01-19 17:10:31 +000059 elif istoken(value):
Fred Drake279ca751999-01-29 21:35:50 +000060 if value == "no" + name:
Fred Drake607aed71999-02-18 16:30:16 +000061 append(value)
Fred Drake279ca751999-01-29 21:35:50 +000062 else:
Fred Drake607aed71999-02-18 16:30:16 +000063 append("%s=%s" % (name, value))
Fred Drakef82e4ab1999-01-19 17:10:31 +000064 else:
Fred Drake607aed71999-02-18 16:30:16 +000065 append('%s="%s"' % (name, escape(value)))
66 if parts:
67 parts.insert(0, '')
68 return string.join(parts)
Fred Drake30a68c71998-11-23 16:59:39 +000069
70
Fred Drake36dfe581999-01-19 23:03:04 +000071_nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE)
Fred Drakef82e4ab1999-01-19 17:10:31 +000072def isnmtoken(s):
73 return _nmtoken_rx.match(s) is not None
74
Fred Drake36dfe581999-01-19 23:03:04 +000075_token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE)
Fred Drakef82e4ab1999-01-19 17:10:31 +000076def istoken(s):
77 return _token_rx.match(s) is not None
78
79
Fred Drakef032cdb1999-07-29 22:03:52 +000080def convert(ifp, ofp, xml=0, autoclose=(), verbatims=()):
Fred Drake43278f01999-01-20 20:35:05 +000081 if xml:
82 autoclose = ()
Fred Drake30a68c71998-11-23 16:59:39 +000083 attrs = {}
84 lastopened = None
Fred Drake4abcffb1998-12-10 18:31:37 +000085 knownempties = []
Fred Drake30a68c71998-11-23 16:59:39 +000086 knownempty = 0
87 lastempty = 0
Fred Drakec4811d81999-05-18 17:34:51 +000088 inverbatim = 0
Fred Drake30a68c71998-11-23 16:59:39 +000089 while 1:
90 line = ifp.readline()
91 if not line:
92 break
93
94 type = line[0]
95 data = line[1:]
96 if data and data[-1] == "\n":
97 data = data[:-1]
98 if type == "-":
Fred Drakef077b9d1998-12-01 19:01:53 +000099 data = esistools.decode(data)
Fred Drakec4811d81999-05-18 17:34:51 +0000100 data = escape(data)
101 if not inverbatim:
102 data = string.replace(data, "---", "—")
103 ofp.write(data)
Fred Drake30a68c71998-11-23 16:59:39 +0000104 if "\n" in data:
105 lastopened = None
106 knownempty = 0
107 lastempty = 0
108 elif type == "(":
Fred Drakef077b9d1998-12-01 19:01:53 +0000109 if data == "COMMENT":
110 ofp.write("<!--")
111 continue
Fred Drake607aed71999-02-18 16:30:16 +0000112 data = map_gi(data, _elem_map)
Fred Drake30a68c71998-11-23 16:59:39 +0000113 if knownempty and xml:
Fred Drakef82e4ab1999-01-19 17:10:31 +0000114 ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml)))
Fred Drake30a68c71998-11-23 16:59:39 +0000115 else:
Fred Drakef82e4ab1999-01-19 17:10:31 +0000116 ofp.write("<%s%s>" % (data, format_attrs(attrs, xml)))
Fred Drake30a68c71998-11-23 16:59:39 +0000117 if knownempty and data not in knownempties:
118 # accumulate knowledge!
119 knownempties.append(data)
120 attrs = {}
121 lastopened = data
122 lastempty = knownempty
123 knownempty = 0
Fred Drakec4811d81999-05-18 17:34:51 +0000124 inverbatim = data in verbatims
Fred Drake30a68c71998-11-23 16:59:39 +0000125 elif type == ")":
Fred Drakef077b9d1998-12-01 19:01:53 +0000126 if data == "COMMENT":
127 ofp.write("-->")
128 continue
Fred Drake607aed71999-02-18 16:30:16 +0000129 data = map_gi(data, _elem_map)
Fred Drake30a68c71998-11-23 16:59:39 +0000130 if xml:
131 if not lastempty:
132 ofp.write("</%s>" % data)
133 elif data not in knownempties:
Fred Drake43278f01999-01-20 20:35:05 +0000134 if data in autoclose:
135 pass
136 elif lastopened == data:
Fred Drake30a68c71998-11-23 16:59:39 +0000137 ofp.write("</>")
138 else:
139 ofp.write("</%s>" % data)
140 lastopened = None
141 lastempty = 0
Fred Drakec4811d81999-05-18 17:34:51 +0000142 inverbatim = 0
Fred Drake30a68c71998-11-23 16:59:39 +0000143 elif type == "A":
144 name, type, value = string.split(data, " ", 2)
Fred Drake607aed71999-02-18 16:30:16 +0000145 name = map_gi(name, _attr_map)
Fred Drakef077b9d1998-12-01 19:01:53 +0000146 attrs[name] = esistools.decode(value)
Fred Drake30a68c71998-11-23 16:59:39 +0000147 elif type == "e":
148 knownempty = 1
Fred Drake53eae8e1999-08-26 17:50:26 +0000149 elif type == "&":
150 ofp.write("&%s;" % data)
151 knownempty = 0
152 else:
153 raise RuntimeError, "unrecognized ESIS event type: '%s'" % type
Fred Drake30a68c71998-11-23 16:59:39 +0000154
Fred Drake36dfe581999-01-19 23:03:04 +0000155 if LIST_EMPTIES:
Fred Drake607aed71999-02-18 16:30:16 +0000156 dump_empty_element_names(knownempties)
157
158
159def dump_empty_element_names(knownempties):
Fred Drakef032cdb1999-07-29 22:03:52 +0000160 d = {}
161 for gi in knownempties:
162 d[gi] = gi
Fred Drake607aed71999-02-18 16:30:16 +0000163 knownempties.append("")
164 if os.path.isfile(EMPTIES_FILENAME):
Fred Drakef032cdb1999-07-29 22:03:52 +0000165 fp = open(EMPTIES_FILENAME)
166 while 1:
167 line = fp.readline()
168 if not line:
169 break
170 gi = string.strip(line)
171 if gi:
172 d[gi] = gi
173 fp = open(EMPTIES_FILENAME, "w")
174 gilist = d.keys()
175 gilist.sort()
176 fp.write(string.join(gilist, "\n"))
177 fp.write("\n")
Fred Drake607aed71999-02-18 16:30:16 +0000178 fp.close()
Fred Drake36dfe581999-01-19 23:03:04 +0000179
Fred Drake30a68c71998-11-23 16:59:39 +0000180
Fred Drake607aed71999-02-18 16:30:16 +0000181def update_gi_map(map, names, fromsgml=1):
182 for name in string.split(names, ","):
183 if fromsgml:
184 uncased = string.lower(name)
185 else:
186 uncased = name
187 map[uncased] = name
Fred Drake30a68c71998-11-23 16:59:39 +0000188
189
190def main():
Fred Drakef077b9d1998-12-01 19:01:53 +0000191 import getopt
Fred Drake30a68c71998-11-23 16:59:39 +0000192 import sys
193 #
Fred Drake43278f01999-01-20 20:35:05 +0000194 autoclose = AUTOCLOSE
Fred Drake607aed71999-02-18 16:30:16 +0000195 xml = 1
Fred Drakef077b9d1998-12-01 19:01:53 +0000196 xmldecl = 0
Fred Drake607aed71999-02-18 16:30:16 +0000197 elem_names = ''
198 attr_names = ''
199 value_names = ''
Fred Drakec4811d81999-05-18 17:34:51 +0000200 verbatims = ('verbatim', 'interactive-session')
Fred Drake607aed71999-02-18 16:30:16 +0000201 opts, args = getopt.getopt(sys.argv[1:], "adesx",
202 ["autoclose=", "declare", "sgml", "xml",
203 "elements-map=", "attributes-map",
204 "values-map="])
Fred Drakef077b9d1998-12-01 19:01:53 +0000205 for opt, arg in opts:
206 if opt in ("-d", "--declare"):
207 xmldecl = 1
Fred Drake607aed71999-02-18 16:30:16 +0000208 elif opt == "-e":
209 global LIST_EMPTIES
210 LIST_EMPTIES = 1
211 elif opt in ("-s", "--sgml"):
212 xml = 0
Fred Drakef077b9d1998-12-01 19:01:53 +0000213 elif opt in ("-x", "--xml"):
214 xml = 1
Fred Drake43278f01999-01-20 20:35:05 +0000215 elif opt in ("-a", "--autoclose"):
216 autoclose = string.split(arg, ",")
Fred Drake607aed71999-02-18 16:30:16 +0000217 elif opt == "--elements-map":
218 elem_names = ("%s,%s" % (elem_names, arg))[1:]
219 elif opt == "--attributes-map":
220 attr_names = ("%s,%s" % (attr_names, arg))[1:]
221 elif opt == "--values-map":
222 value_names = ("%s,%s" % (value_names, arg))[1:]
223 #
224 # open input streams:
225 #
Fred Drakef077b9d1998-12-01 19:01:53 +0000226 if len(args) == 0:
Fred Drake30a68c71998-11-23 16:59:39 +0000227 ifp = sys.stdin
228 ofp = sys.stdout
Fred Drakef077b9d1998-12-01 19:01:53 +0000229 elif len(args) == 1:
230 ifp = open(args[0])
Fred Drake30a68c71998-11-23 16:59:39 +0000231 ofp = sys.stdout
Fred Drakef077b9d1998-12-01 19:01:53 +0000232 elif len(args) == 2:
233 ifp = open(args[0])
234 ofp = open(args[1], "w")
Fred Drake30a68c71998-11-23 16:59:39 +0000235 else:
236 usage()
237 sys.exit(2)
Fred Drake607aed71999-02-18 16:30:16 +0000238 #
239 # setup the name maps:
240 #
241 if elem_names or attr_names or value_names:
242 # assume the origin was SGML; ignore case of the names from the ESIS
243 # stream but set up conversion tables to get the case right on output
244 global _normalize_case
245 _normalize_case = string.lower
246 update_gi_map(_elem_map, string.split(elem_names, ","))
247 update_gi_map(_attr_map, string.split(attr_names, ","))
248 update_gi_map(_values_map, string.split(value_names, ","))
249 else:
250 global map_gi
251 map_gi = null_map_gi
252 #
253 # run the conversion:
254 #
Fred Drake30a68c71998-11-23 16:59:39 +0000255 try:
Fred Drakef077b9d1998-12-01 19:01:53 +0000256 if xml and xmldecl:
257 opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n')
Fred Drakef032cdb1999-07-29 22:03:52 +0000258 convert(ifp, ofp, xml=xml, autoclose=autoclose, verbatims=verbatims)
Fred Drake30a68c71998-11-23 16:59:39 +0000259 except IOError, (err, msg):
260 if err != errno.EPIPE:
261 raise
262
263
264if __name__ == "__main__":
265 main()