blob: 10ec83ac6a3384b61c5d060973d8583c85cc3eb3 [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
3"""Convert ESIS events to SGML or XML markup.
4
5This is limited, but seems sufficient for the ESIS generated by the
6latex2esis.py script when run over the Python documentation.
7"""
Fred Drake607aed71999-02-18 16:30:16 +00008
9# This should have an explicit option to indicate whether the *INPUT* was
10# generated from an SGML or an XML application.
11
Fred Drake30a68c71998-11-23 16:59:39 +000012import errno
Fred Drake36dfe581999-01-19 23:03:04 +000013import os
Fred Drake30a68c71998-11-23 16:59:39 +000014import re
15import string
16
Fred Drakea4699a72001-03-23 16:38:12 +000017from xml.sax.saxutils import escape
Fred Drake79ad1f11999-01-14 17:06:09 +000018
Fred Drake2314a042002-10-16 16:06:07 +000019import esistools
20
Fred Drake30a68c71998-11-23 16:59:39 +000021
Fred Drake607aed71999-02-18 16:30:16 +000022AUTOCLOSE = ()
23
Fred Drake36dfe581999-01-19 23:03:04 +000024EMPTIES_FILENAME = "../sgml/empties.dat"
25LIST_EMPTIES = 0
26
27
Fred Drake607aed71999-02-18 16:30:16 +000028_elem_map = {}
29_attr_map = {}
30_token_map = {}
31
32_normalize_case = str
33
34def map_gi(sgmlgi, map):
35 uncased = _normalize_case(sgmlgi)
36 try:
37 return map[uncased]
38 except IndexError:
39 map[uncased] = sgmlgi
40 return sgmlgi
41
42def null_map_gi(sgmlgi, map):
43 return sgmlgi
44
45
Fred Drakef82e4ab1999-01-19 17:10:31 +000046def format_attrs(attrs, xml=0):
Collin Winter65d09d42007-03-21 02:11:39 +000047 attrs = sorted(attrs.items())
Fred Drake607aed71999-02-18 16:30:16 +000048 parts = []
49 append = parts.append
Fred Drake30a68c71998-11-23 16:59:39 +000050 for name, value in attrs:
Fred Drakef82e4ab1999-01-19 17:10:31 +000051 if xml:
Fred Drake607aed71999-02-18 16:30:16 +000052 append('%s="%s"' % (name, escape(value)))
Fred Drakef82e4ab1999-01-19 17:10:31 +000053 else:
54 # this is a little bogus, but should do for now
55 if name == value and isnmtoken(value):
Fred Drake607aed71999-02-18 16:30:16 +000056 append(value)
Fred Drakef82e4ab1999-01-19 17:10:31 +000057 elif istoken(value):
Fred Drake279ca751999-01-29 21:35:50 +000058 if value == "no" + name:
Fred Drake607aed71999-02-18 16:30:16 +000059 append(value)
Fred Drake279ca751999-01-29 21:35:50 +000060 else:
Fred Drake607aed71999-02-18 16:30:16 +000061 append("%s=%s" % (name, value))
Fred Drakef82e4ab1999-01-19 17:10:31 +000062 else:
Fred Drake607aed71999-02-18 16:30:16 +000063 append('%s="%s"' % (name, escape(value)))
64 if parts:
65 parts.insert(0, '')
Fred Drake2314a042002-10-16 16:06:07 +000066 return " ".join(parts)
Fred Drake30a68c71998-11-23 16:59:39 +000067
68
Fred Drake36dfe581999-01-19 23:03:04 +000069_nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE)
Fred Drakef82e4ab1999-01-19 17:10:31 +000070def isnmtoken(s):
71 return _nmtoken_rx.match(s) is not None
72
Fred Drake36dfe581999-01-19 23:03:04 +000073_token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE)
Fred Drakef82e4ab1999-01-19 17:10:31 +000074def istoken(s):
75 return _token_rx.match(s) is not None
76
77
Fred Drakef032cdb1999-07-29 22:03:52 +000078def convert(ifp, ofp, xml=0, autoclose=(), verbatims=()):
Fred Drake43278f01999-01-20 20:35:05 +000079 if xml:
80 autoclose = ()
Fred Drake30a68c71998-11-23 16:59:39 +000081 attrs = {}
82 lastopened = None
Fred Drake4abcffb1998-12-10 18:31:37 +000083 knownempties = []
Fred Drake30a68c71998-11-23 16:59:39 +000084 knownempty = 0
85 lastempty = 0
Fred Drakec4811d81999-05-18 17:34:51 +000086 inverbatim = 0
Fred Drake30a68c71998-11-23 16:59:39 +000087 while 1:
88 line = ifp.readline()
89 if not line:
90 break
91
92 type = line[0]
93 data = line[1:]
94 if data and data[-1] == "\n":
95 data = data[:-1]
96 if type == "-":
Fred Drakef077b9d1998-12-01 19:01:53 +000097 data = esistools.decode(data)
Fred Drakec4811d81999-05-18 17:34:51 +000098 data = escape(data)
99 if not inverbatim:
Fred Drake0f9bfd32001-09-28 16:26:13 +0000100 data = data.replace("---", "—")
Fred Drakec4811d81999-05-18 17:34:51 +0000101 ofp.write(data)
Fred Drake30a68c71998-11-23 16:59:39 +0000102 if "\n" in data:
103 lastopened = None
104 knownempty = 0
105 lastempty = 0
106 elif type == "(":
Fred Drakef077b9d1998-12-01 19:01:53 +0000107 if data == "COMMENT":
108 ofp.write("<!--")
109 continue
Fred Drake607aed71999-02-18 16:30:16 +0000110 data = map_gi(data, _elem_map)
Fred Drake30a68c71998-11-23 16:59:39 +0000111 if knownempty and xml:
Fred Drakef82e4ab1999-01-19 17:10:31 +0000112 ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml)))
Fred Drake30a68c71998-11-23 16:59:39 +0000113 else:
Fred Drakef82e4ab1999-01-19 17:10:31 +0000114 ofp.write("<%s%s>" % (data, format_attrs(attrs, xml)))
Fred Drake30a68c71998-11-23 16:59:39 +0000115 if knownempty and data not in knownempties:
116 # accumulate knowledge!
117 knownempties.append(data)
118 attrs = {}
119 lastopened = data
120 lastempty = knownempty
121 knownempty = 0
Fred Drakec4811d81999-05-18 17:34:51 +0000122 inverbatim = data in verbatims
Fred Drake30a68c71998-11-23 16:59:39 +0000123 elif type == ")":
Fred Drakef077b9d1998-12-01 19:01:53 +0000124 if data == "COMMENT":
125 ofp.write("-->")
126 continue
Fred Drake607aed71999-02-18 16:30:16 +0000127 data = map_gi(data, _elem_map)
Fred Drake30a68c71998-11-23 16:59:39 +0000128 if xml:
129 if not lastempty:
130 ofp.write("</%s>" % data)
131 elif data not in knownempties:
Fred Drake43278f01999-01-20 20:35:05 +0000132 if data in autoclose:
133 pass
134 elif lastopened == data:
Fred Drake30a68c71998-11-23 16:59:39 +0000135 ofp.write("</>")
136 else:
137 ofp.write("</%s>" % data)
138 lastopened = None
139 lastempty = 0
Fred Drakec4811d81999-05-18 17:34:51 +0000140 inverbatim = 0
Fred Drake30a68c71998-11-23 16:59:39 +0000141 elif type == "A":
Fred Drake0f9bfd32001-09-28 16:26:13 +0000142 name, type, value = data.split(" ", 2)
Fred Drake607aed71999-02-18 16:30:16 +0000143 name = map_gi(name, _attr_map)
Fred Drakef077b9d1998-12-01 19:01:53 +0000144 attrs[name] = esistools.decode(value)
Fred Drake30a68c71998-11-23 16:59:39 +0000145 elif type == "e":
146 knownempty = 1
Fred Drake53eae8e1999-08-26 17:50:26 +0000147 elif type == "&":
148 ofp.write("&%s;" % data)
149 knownempty = 0
150 else:
Collin Winter65d09d42007-03-21 02:11:39 +0000151 raise RuntimeError("unrecognized ESIS event type: '%s'" % type)
Fred Drake30a68c71998-11-23 16:59:39 +0000152
Fred Drake36dfe581999-01-19 23:03:04 +0000153 if LIST_EMPTIES:
Fred Drake607aed71999-02-18 16:30:16 +0000154 dump_empty_element_names(knownempties)
155
156
157def dump_empty_element_names(knownempties):
Fred Drakef032cdb1999-07-29 22:03:52 +0000158 d = {}
159 for gi in knownempties:
160 d[gi] = gi
Fred Drake607aed71999-02-18 16:30:16 +0000161 knownempties.append("")
162 if os.path.isfile(EMPTIES_FILENAME):
Fred Drakef032cdb1999-07-29 22:03:52 +0000163 fp = open(EMPTIES_FILENAME)
164 while 1:
165 line = fp.readline()
166 if not line:
167 break
Fred Drake0f9bfd32001-09-28 16:26:13 +0000168 gi = line.strip()
Fred Drakef032cdb1999-07-29 22:03:52 +0000169 if gi:
170 d[gi] = gi
171 fp = open(EMPTIES_FILENAME, "w")
Collin Winter65d09d42007-03-21 02:11:39 +0000172 gilist = sorted(d.keys())
Fred Drake2314a042002-10-16 16:06:07 +0000173 fp.write("\n".join(gilist))
Fred Drakef032cdb1999-07-29 22:03:52 +0000174 fp.write("\n")
Fred Drake607aed71999-02-18 16:30:16 +0000175 fp.close()
Fred Drake36dfe581999-01-19 23:03:04 +0000176
Fred Drake30a68c71998-11-23 16:59:39 +0000177
Fred Drake607aed71999-02-18 16:30:16 +0000178def update_gi_map(map, names, fromsgml=1):
Fred Drake0f9bfd32001-09-28 16:26:13 +0000179 for name in names.split(","):
Fred Drake607aed71999-02-18 16:30:16 +0000180 if fromsgml:
Fred Drake0f9bfd32001-09-28 16:26:13 +0000181 uncased = name.lower()
Fred Drake607aed71999-02-18 16:30:16 +0000182 else:
183 uncased = name
184 map[uncased] = name
Fred Drake30a68c71998-11-23 16:59:39 +0000185
186
187def main():
Fred Drakef077b9d1998-12-01 19:01:53 +0000188 import getopt
Fred Drake30a68c71998-11-23 16:59:39 +0000189 import sys
190 #
Fred Drake43278f01999-01-20 20:35:05 +0000191 autoclose = AUTOCLOSE
Fred Drake607aed71999-02-18 16:30:16 +0000192 xml = 1
Fred Drakef077b9d1998-12-01 19:01:53 +0000193 xmldecl = 0
Fred Drake607aed71999-02-18 16:30:16 +0000194 elem_names = ''
195 attr_names = ''
196 value_names = ''
Fred Drakec4811d81999-05-18 17:34:51 +0000197 verbatims = ('verbatim', 'interactive-session')
Fred Drake607aed71999-02-18 16:30:16 +0000198 opts, args = getopt.getopt(sys.argv[1:], "adesx",
199 ["autoclose=", "declare", "sgml", "xml",
200 "elements-map=", "attributes-map",
201 "values-map="])
Fred Drakef077b9d1998-12-01 19:01:53 +0000202 for opt, arg in opts:
203 if opt in ("-d", "--declare"):
204 xmldecl = 1
Fred Drake607aed71999-02-18 16:30:16 +0000205 elif opt == "-e":
206 global LIST_EMPTIES
207 LIST_EMPTIES = 1
208 elif opt in ("-s", "--sgml"):
209 xml = 0
Fred Drakef077b9d1998-12-01 19:01:53 +0000210 elif opt in ("-x", "--xml"):
211 xml = 1
Fred Drake43278f01999-01-20 20:35:05 +0000212 elif opt in ("-a", "--autoclose"):
Fred Drake0f9bfd32001-09-28 16:26:13 +0000213 autoclose = arg.split(",")
Fred Drake607aed71999-02-18 16:30:16 +0000214 elif opt == "--elements-map":
215 elem_names = ("%s,%s" % (elem_names, arg))[1:]
216 elif opt == "--attributes-map":
217 attr_names = ("%s,%s" % (attr_names, arg))[1:]
218 elif opt == "--values-map":
219 value_names = ("%s,%s" % (value_names, arg))[1:]
220 #
221 # open input streams:
222 #
Fred Drakef077b9d1998-12-01 19:01:53 +0000223 if len(args) == 0:
Fred Drake30a68c71998-11-23 16:59:39 +0000224 ifp = sys.stdin
225 ofp = sys.stdout
Fred Drakef077b9d1998-12-01 19:01:53 +0000226 elif len(args) == 1:
227 ifp = open(args[0])
Fred Drake30a68c71998-11-23 16:59:39 +0000228 ofp = sys.stdout
Fred Drakef077b9d1998-12-01 19:01:53 +0000229 elif len(args) == 2:
230 ifp = open(args[0])
231 ofp = open(args[1], "w")
Fred Drake30a68c71998-11-23 16:59:39 +0000232 else:
233 usage()
234 sys.exit(2)
Fred Drake607aed71999-02-18 16:30:16 +0000235 #
236 # setup the name maps:
237 #
238 if elem_names or attr_names or value_names:
239 # assume the origin was SGML; ignore case of the names from the ESIS
240 # stream but set up conversion tables to get the case right on output
241 global _normalize_case
242 _normalize_case = string.lower
Fred Drake0f9bfd32001-09-28 16:26:13 +0000243 update_gi_map(_elem_map, elem_names.split(","))
244 update_gi_map(_attr_map, attr_names.split(","))
245 update_gi_map(_values_map, value_names.split(","))
Fred Drake607aed71999-02-18 16:30:16 +0000246 else:
247 global map_gi
248 map_gi = null_map_gi
249 #
250 # run the conversion:
251 #
Fred Drake30a68c71998-11-23 16:59:39 +0000252 try:
Fred Drakef077b9d1998-12-01 19:01:53 +0000253 if xml and xmldecl:
254 opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n')
Fred Drakef032cdb1999-07-29 22:03:52 +0000255 convert(ifp, ofp, xml=xml, autoclose=autoclose, verbatims=verbatims)
Guido van Rossumb940e112007-01-10 16:19:56 +0000256 except IOError as e:
257 (err, msg) = e
Fred Drake30a68c71998-11-23 16:59:39 +0000258 if err != errno.EPIPE:
259 raise
260
261
262if __name__ == "__main__":
263 main()