blob: b6f9a447537cb2a81d492bafc057a256490e0b36 [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
3"""Convert ESIS events to SGML or XML markup.
4
5This is limited, but seems sufficient for the ESIS generated by the
6latex2esis.py script when run over the Python documentation.
7"""
Fred Drake607aed71999-02-18 16:30:16 +00008
9# This should have an explicit option to indicate whether the *INPUT* was
10# generated from an SGML or an XML application.
11
Fred Drake30a68c71998-11-23 16:59:39 +000012import errno
Fred Drake36dfe581999-01-19 23:03:04 +000013import os
Fred Drake30a68c71998-11-23 16:59:39 +000014import re
15import string
16
Fred Drakea4699a72001-03-23 16:38:12 +000017from xml.sax.saxutils import escape
Fred Drake79ad1f11999-01-14 17:06:09 +000018
Fred Drake2314a042002-10-16 16:06:07 +000019import esistools
20
Fred Drake30a68c71998-11-23 16:59:39 +000021
Fred Drake607aed71999-02-18 16:30:16 +000022AUTOCLOSE = ()
23
Fred Drake36dfe581999-01-19 23:03:04 +000024EMPTIES_FILENAME = "../sgml/empties.dat"
25LIST_EMPTIES = 0
26
27
Fred Drake607aed71999-02-18 16:30:16 +000028_elem_map = {}
29_attr_map = {}
30_token_map = {}
31
32_normalize_case = str
33
34def map_gi(sgmlgi, map):
35 uncased = _normalize_case(sgmlgi)
36 try:
37 return map[uncased]
38 except IndexError:
39 map[uncased] = sgmlgi
40 return sgmlgi
41
42def null_map_gi(sgmlgi, map):
43 return sgmlgi
44
45
Fred Drakef82e4ab1999-01-19 17:10:31 +000046def format_attrs(attrs, xml=0):
Fred Drake30a68c71998-11-23 16:59:39 +000047 attrs = attrs.items()
48 attrs.sort()
Fred Drake607aed71999-02-18 16:30:16 +000049 parts = []
50 append = parts.append
Fred Drake30a68c71998-11-23 16:59:39 +000051 for name, value in attrs:
Fred Drakef82e4ab1999-01-19 17:10:31 +000052 if xml:
Fred Drake607aed71999-02-18 16:30:16 +000053 append('%s="%s"' % (name, escape(value)))
Fred Drakef82e4ab1999-01-19 17:10:31 +000054 else:
55 # this is a little bogus, but should do for now
56 if name == value and isnmtoken(value):
Fred Drake607aed71999-02-18 16:30:16 +000057 append(value)
Fred Drakef82e4ab1999-01-19 17:10:31 +000058 elif istoken(value):
Fred Drake279ca751999-01-29 21:35:50 +000059 if value == "no" + name:
Fred Drake607aed71999-02-18 16:30:16 +000060 append(value)
Fred Drake279ca751999-01-29 21:35:50 +000061 else:
Fred Drake607aed71999-02-18 16:30:16 +000062 append("%s=%s" % (name, value))
Fred Drakef82e4ab1999-01-19 17:10:31 +000063 else:
Fred Drake607aed71999-02-18 16:30:16 +000064 append('%s="%s"' % (name, escape(value)))
65 if parts:
66 parts.insert(0, '')
Fred Drake2314a042002-10-16 16:06:07 +000067 return " ".join(parts)
Fred Drake30a68c71998-11-23 16:59:39 +000068
69
Fred Drake36dfe581999-01-19 23:03:04 +000070_nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE)
Fred Drakef82e4ab1999-01-19 17:10:31 +000071def isnmtoken(s):
72 return _nmtoken_rx.match(s) is not None
73
Fred Drake36dfe581999-01-19 23:03:04 +000074_token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE)
Fred Drakef82e4ab1999-01-19 17:10:31 +000075def istoken(s):
76 return _token_rx.match(s) is not None
77
78
Fred Drakef032cdb1999-07-29 22:03:52 +000079def convert(ifp, ofp, xml=0, autoclose=(), verbatims=()):
Fred Drake43278f01999-01-20 20:35:05 +000080 if xml:
81 autoclose = ()
Fred Drake30a68c71998-11-23 16:59:39 +000082 attrs = {}
83 lastopened = None
Fred Drake4abcffb1998-12-10 18:31:37 +000084 knownempties = []
Fred Drake30a68c71998-11-23 16:59:39 +000085 knownempty = 0
86 lastempty = 0
Fred Drakec4811d81999-05-18 17:34:51 +000087 inverbatim = 0
Fred Drake30a68c71998-11-23 16:59:39 +000088 while 1:
89 line = ifp.readline()
90 if not line:
91 break
92
93 type = line[0]
94 data = line[1:]
95 if data and data[-1] == "\n":
96 data = data[:-1]
97 if type == "-":
Fred Drakef077b9d1998-12-01 19:01:53 +000098 data = esistools.decode(data)
Fred Drakec4811d81999-05-18 17:34:51 +000099 data = escape(data)
100 if not inverbatim:
Fred Drake0f9bfd32001-09-28 16:26:13 +0000101 data = data.replace("---", "—")
Fred Drakec4811d81999-05-18 17:34:51 +0000102 ofp.write(data)
Fred Drake30a68c71998-11-23 16:59:39 +0000103 if "\n" in data:
104 lastopened = None
105 knownempty = 0
106 lastempty = 0
107 elif type == "(":
Fred Drakef077b9d1998-12-01 19:01:53 +0000108 if data == "COMMENT":
109 ofp.write("<!--")
110 continue
Fred Drake607aed71999-02-18 16:30:16 +0000111 data = map_gi(data, _elem_map)
Fred Drake30a68c71998-11-23 16:59:39 +0000112 if knownempty and xml:
Fred Drakef82e4ab1999-01-19 17:10:31 +0000113 ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml)))
Fred Drake30a68c71998-11-23 16:59:39 +0000114 else:
Fred Drakef82e4ab1999-01-19 17:10:31 +0000115 ofp.write("<%s%s>" % (data, format_attrs(attrs, xml)))
Fred Drake30a68c71998-11-23 16:59:39 +0000116 if knownempty and data not in knownempties:
117 # accumulate knowledge!
118 knownempties.append(data)
119 attrs = {}
120 lastopened = data
121 lastempty = knownempty
122 knownempty = 0
Fred Drakec4811d81999-05-18 17:34:51 +0000123 inverbatim = data in verbatims
Fred Drake30a68c71998-11-23 16:59:39 +0000124 elif type == ")":
Fred Drakef077b9d1998-12-01 19:01:53 +0000125 if data == "COMMENT":
126 ofp.write("-->")
127 continue
Fred Drake607aed71999-02-18 16:30:16 +0000128 data = map_gi(data, _elem_map)
Fred Drake30a68c71998-11-23 16:59:39 +0000129 if xml:
130 if not lastempty:
131 ofp.write("</%s>" % data)
132 elif data not in knownempties:
Fred Drake43278f01999-01-20 20:35:05 +0000133 if data in autoclose:
134 pass
135 elif lastopened == data:
Fred Drake30a68c71998-11-23 16:59:39 +0000136 ofp.write("</>")
137 else:
138 ofp.write("</%s>" % data)
139 lastopened = None
140 lastempty = 0
Fred Drakec4811d81999-05-18 17:34:51 +0000141 inverbatim = 0
Fred Drake30a68c71998-11-23 16:59:39 +0000142 elif type == "A":
Fred Drake0f9bfd32001-09-28 16:26:13 +0000143 name, type, value = data.split(" ", 2)
Fred Drake607aed71999-02-18 16:30:16 +0000144 name = map_gi(name, _attr_map)
Fred Drakef077b9d1998-12-01 19:01:53 +0000145 attrs[name] = esistools.decode(value)
Fred Drake30a68c71998-11-23 16:59:39 +0000146 elif type == "e":
147 knownempty = 1
Fred Drake53eae8e1999-08-26 17:50:26 +0000148 elif type == "&":
149 ofp.write("&%s;" % data)
150 knownempty = 0
151 else:
152 raise RuntimeError, "unrecognized ESIS event type: '%s'" % type
Fred Drake30a68c71998-11-23 16:59:39 +0000153
Fred Drake36dfe581999-01-19 23:03:04 +0000154 if LIST_EMPTIES:
Fred Drake607aed71999-02-18 16:30:16 +0000155 dump_empty_element_names(knownempties)
156
157
158def dump_empty_element_names(knownempties):
Fred Drakef032cdb1999-07-29 22:03:52 +0000159 d = {}
160 for gi in knownempties:
161 d[gi] = gi
Fred Drake607aed71999-02-18 16:30:16 +0000162 knownempties.append("")
163 if os.path.isfile(EMPTIES_FILENAME):
Fred Drakef032cdb1999-07-29 22:03:52 +0000164 fp = open(EMPTIES_FILENAME)
165 while 1:
166 line = fp.readline()
167 if not line:
168 break
Fred Drake0f9bfd32001-09-28 16:26:13 +0000169 gi = line.strip()
Fred Drakef032cdb1999-07-29 22:03:52 +0000170 if gi:
171 d[gi] = gi
172 fp = open(EMPTIES_FILENAME, "w")
173 gilist = d.keys()
174 gilist.sort()
Fred Drake2314a042002-10-16 16:06:07 +0000175 fp.write("\n".join(gilist))
Fred Drakef032cdb1999-07-29 22:03:52 +0000176 fp.write("\n")
Fred Drake607aed71999-02-18 16:30:16 +0000177 fp.close()
Fred Drake36dfe581999-01-19 23:03:04 +0000178
Fred Drake30a68c71998-11-23 16:59:39 +0000179
Fred Drake607aed71999-02-18 16:30:16 +0000180def update_gi_map(map, names, fromsgml=1):
Fred Drake0f9bfd32001-09-28 16:26:13 +0000181 for name in names.split(","):
Fred Drake607aed71999-02-18 16:30:16 +0000182 if fromsgml:
Fred Drake0f9bfd32001-09-28 16:26:13 +0000183 uncased = name.lower()
Fred Drake607aed71999-02-18 16:30:16 +0000184 else:
185 uncased = name
186 map[uncased] = name
Fred Drake30a68c71998-11-23 16:59:39 +0000187
188
189def main():
Fred Drakef077b9d1998-12-01 19:01:53 +0000190 import getopt
Fred Drake30a68c71998-11-23 16:59:39 +0000191 import sys
192 #
Fred Drake43278f01999-01-20 20:35:05 +0000193 autoclose = AUTOCLOSE
Fred Drake607aed71999-02-18 16:30:16 +0000194 xml = 1
Fred Drakef077b9d1998-12-01 19:01:53 +0000195 xmldecl = 0
Fred Drake607aed71999-02-18 16:30:16 +0000196 elem_names = ''
197 attr_names = ''
198 value_names = ''
Fred Drakec4811d81999-05-18 17:34:51 +0000199 verbatims = ('verbatim', 'interactive-session')
Fred Drake607aed71999-02-18 16:30:16 +0000200 opts, args = getopt.getopt(sys.argv[1:], "adesx",
201 ["autoclose=", "declare", "sgml", "xml",
202 "elements-map=", "attributes-map",
203 "values-map="])
Fred Drakef077b9d1998-12-01 19:01:53 +0000204 for opt, arg in opts:
205 if opt in ("-d", "--declare"):
206 xmldecl = 1
Fred Drake607aed71999-02-18 16:30:16 +0000207 elif opt == "-e":
208 global LIST_EMPTIES
209 LIST_EMPTIES = 1
210 elif opt in ("-s", "--sgml"):
211 xml = 0
Fred Drakef077b9d1998-12-01 19:01:53 +0000212 elif opt in ("-x", "--xml"):
213 xml = 1
Fred Drake43278f01999-01-20 20:35:05 +0000214 elif opt in ("-a", "--autoclose"):
Fred Drake0f9bfd32001-09-28 16:26:13 +0000215 autoclose = arg.split(",")
Fred Drake607aed71999-02-18 16:30:16 +0000216 elif opt == "--elements-map":
217 elem_names = ("%s,%s" % (elem_names, arg))[1:]
218 elif opt == "--attributes-map":
219 attr_names = ("%s,%s" % (attr_names, arg))[1:]
220 elif opt == "--values-map":
221 value_names = ("%s,%s" % (value_names, arg))[1:]
222 #
223 # open input streams:
224 #
Fred Drakef077b9d1998-12-01 19:01:53 +0000225 if len(args) == 0:
Fred Drake30a68c71998-11-23 16:59:39 +0000226 ifp = sys.stdin
227 ofp = sys.stdout
Fred Drakef077b9d1998-12-01 19:01:53 +0000228 elif len(args) == 1:
229 ifp = open(args[0])
Fred Drake30a68c71998-11-23 16:59:39 +0000230 ofp = sys.stdout
Fred Drakef077b9d1998-12-01 19:01:53 +0000231 elif len(args) == 2:
232 ifp = open(args[0])
233 ofp = open(args[1], "w")
Fred Drake30a68c71998-11-23 16:59:39 +0000234 else:
235 usage()
236 sys.exit(2)
Fred Drake607aed71999-02-18 16:30:16 +0000237 #
238 # setup the name maps:
239 #
240 if elem_names or attr_names or value_names:
241 # assume the origin was SGML; ignore case of the names from the ESIS
242 # stream but set up conversion tables to get the case right on output
243 global _normalize_case
244 _normalize_case = string.lower
Fred Drake0f9bfd32001-09-28 16:26:13 +0000245 update_gi_map(_elem_map, elem_names.split(","))
246 update_gi_map(_attr_map, attr_names.split(","))
247 update_gi_map(_values_map, value_names.split(","))
Fred Drake607aed71999-02-18 16:30:16 +0000248 else:
249 global map_gi
250 map_gi = null_map_gi
251 #
252 # run the conversion:
253 #
Fred Drake30a68c71998-11-23 16:59:39 +0000254 try:
Fred Drakef077b9d1998-12-01 19:01:53 +0000255 if xml and xmldecl:
256 opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n')
Fred Drakef032cdb1999-07-29 22:03:52 +0000257 convert(ifp, ofp, xml=xml, autoclose=autoclose, verbatims=verbatims)
Fred Drake30a68c71998-11-23 16:59:39 +0000258 except IOError, (err, msg):
259 if err != errno.EPIPE:
260 raise
261
262
263if __name__ == "__main__":
264 main()