blob: 762e5ffb4f20c7d8e589c59b22518a59f1052560 [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
3"""Convert ESIS events to SGML or XML markup.
4
5This is limited, but seems sufficient for the ESIS generated by the
6latex2esis.py script when run over the Python documentation.
7"""
8__version__ = '$Revision$'
9
10import errno
Fred Drakef077b9d1998-12-01 19:01:53 +000011import esistools
Fred Drake36dfe581999-01-19 23:03:04 +000012import os
Fred Drake30a68c71998-11-23 16:59:39 +000013import re
14import string
15
Fred Drake79ad1f11999-01-14 17:06:09 +000016from xml.utils import escape
17
Fred Drake30a68c71998-11-23 16:59:39 +000018
Fred Drake36dfe581999-01-19 23:03:04 +000019EMPTIES_FILENAME = "../sgml/empties.dat"
20LIST_EMPTIES = 0
21
22
Fred Drakef82e4ab1999-01-19 17:10:31 +000023def format_attrs(attrs, xml=0):
Fred Drake30a68c71998-11-23 16:59:39 +000024 attrs = attrs.items()
25 attrs.sort()
26 s = ''
27 for name, value in attrs:
Fred Drakef82e4ab1999-01-19 17:10:31 +000028 if xml:
29 s = '%s %s="%s"' % (s, name, escape(value))
30 else:
31 # this is a little bogus, but should do for now
32 if name == value and isnmtoken(value):
33 s = "%s %s" % (s, value)
34 elif istoken(value):
Fred Drake279ca751999-01-29 21:35:50 +000035 if value == "no" + name:
36 s = "%s %s" % (s, value)
37 else:
38 s = "%s %s=%s" % (s, name, value)
Fred Drakef82e4ab1999-01-19 17:10:31 +000039 else:
40 s = '%s %s="%s"' % (s, name, escape(value))
Fred Drake30a68c71998-11-23 16:59:39 +000041 return s
42
43
Fred Drake36dfe581999-01-19 23:03:04 +000044_nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE)
Fred Drakef82e4ab1999-01-19 17:10:31 +000045def isnmtoken(s):
46 return _nmtoken_rx.match(s) is not None
47
Fred Drake36dfe581999-01-19 23:03:04 +000048_token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE)
Fred Drakef82e4ab1999-01-19 17:10:31 +000049def istoken(s):
50 return _token_rx.match(s) is not None
51
52
Fred Drake43278f01999-01-20 20:35:05 +000053def do_convert(ifp, ofp, xml=0, autoclose=()):
54 if xml:
55 autoclose = ()
Fred Drake30a68c71998-11-23 16:59:39 +000056 attrs = {}
57 lastopened = None
Fred Drake4abcffb1998-12-10 18:31:37 +000058 knownempties = []
Fred Drake30a68c71998-11-23 16:59:39 +000059 knownempty = 0
60 lastempty = 0
61 while 1:
62 line = ifp.readline()
63 if not line:
64 break
65
66 type = line[0]
67 data = line[1:]
68 if data and data[-1] == "\n":
69 data = data[:-1]
70 if type == "-":
Fred Drakef077b9d1998-12-01 19:01:53 +000071 data = esistools.decode(data)
Fred Drake79ad1f11999-01-14 17:06:09 +000072 ofp.write(escape(data))
Fred Drake30a68c71998-11-23 16:59:39 +000073 if "\n" in data:
74 lastopened = None
75 knownempty = 0
76 lastempty = 0
77 elif type == "(":
Fred Drakef077b9d1998-12-01 19:01:53 +000078 if data == "COMMENT":
79 ofp.write("<!--")
80 continue
Fred Drake30a68c71998-11-23 16:59:39 +000081 if knownempty and xml:
Fred Drakef82e4ab1999-01-19 17:10:31 +000082 ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml)))
Fred Drake30a68c71998-11-23 16:59:39 +000083 else:
Fred Drakef82e4ab1999-01-19 17:10:31 +000084 ofp.write("<%s%s>" % (data, format_attrs(attrs, xml)))
Fred Drake30a68c71998-11-23 16:59:39 +000085 if knownempty and data not in knownempties:
86 # accumulate knowledge!
87 knownempties.append(data)
88 attrs = {}
89 lastopened = data
90 lastempty = knownempty
91 knownempty = 0
92 elif type == ")":
Fred Drakef077b9d1998-12-01 19:01:53 +000093 if data == "COMMENT":
94 ofp.write("-->")
95 continue
Fred Drake30a68c71998-11-23 16:59:39 +000096 if xml:
97 if not lastempty:
98 ofp.write("</%s>" % data)
99 elif data not in knownempties:
Fred Drake43278f01999-01-20 20:35:05 +0000100 if data in autoclose:
101 pass
102 elif lastopened == data:
Fred Drake30a68c71998-11-23 16:59:39 +0000103 ofp.write("</>")
104 else:
105 ofp.write("</%s>" % data)
106 lastopened = None
107 lastempty = 0
108 elif type == "A":
109 name, type, value = string.split(data, " ", 2)
Fred Drakef077b9d1998-12-01 19:01:53 +0000110 attrs[name] = esistools.decode(value)
Fred Drake30a68c71998-11-23 16:59:39 +0000111 elif type == "e":
112 knownempty = 1
113
Fred Drake36dfe581999-01-19 23:03:04 +0000114 if LIST_EMPTIES:
115 knownempties.append("")
116 if os.path.isfile(EMPTIES_FILENAME):
117 mode = "a"
118 else:
119 mode = "w"
120 fp = open(EMPTIES_FILENAME, mode)
121 fp.write(string.join(knownempties, "\n"))
122 fp.close()
123
Fred Drake30a68c71998-11-23 16:59:39 +0000124
Fred Drake43278f01999-01-20 20:35:05 +0000125def sgml_convert(ifp, ofp, autoclose):
126 return do_convert(ifp, ofp, xml=0, autoclose=autoclose)
Fred Drake30a68c71998-11-23 16:59:39 +0000127
128
Fred Drake43278f01999-01-20 20:35:05 +0000129def xml_convert(ifp, ofp, autoclose):
130 return do_convert(ifp, ofp, xml=1, autoclose=autoclose)
131
132
Fred Drakea49a4ea1999-01-22 22:48:24 +0000133AUTOCLOSE = ("para", "term",)
Fred Drake30a68c71998-11-23 16:59:39 +0000134
135
136def main():
Fred Drakef077b9d1998-12-01 19:01:53 +0000137 import getopt
Fred Drake30a68c71998-11-23 16:59:39 +0000138 import sys
139 #
Fred Drake43278f01999-01-20 20:35:05 +0000140 autoclose = AUTOCLOSE
Fred Drake30a68c71998-11-23 16:59:39 +0000141 convert = sgml_convert
Fred Drakef077b9d1998-12-01 19:01:53 +0000142 xml = 0
143 xmldecl = 0
Fred Drake43278f01999-01-20 20:35:05 +0000144 opts, args = getopt.getopt(sys.argv[1:], "adx",
145 ["autoclose", "declare", "xml"])
Fred Drakef077b9d1998-12-01 19:01:53 +0000146 for opt, arg in opts:
147 if opt in ("-d", "--declare"):
148 xmldecl = 1
149 elif opt in ("-x", "--xml"):
150 xml = 1
151 convert = xml_convert
Fred Drake43278f01999-01-20 20:35:05 +0000152 elif opt in ("-a", "--autoclose"):
153 autoclose = string.split(arg, ",")
Fred Drakef077b9d1998-12-01 19:01:53 +0000154 if len(args) == 0:
Fred Drake30a68c71998-11-23 16:59:39 +0000155 ifp = sys.stdin
156 ofp = sys.stdout
Fred Drakef077b9d1998-12-01 19:01:53 +0000157 elif len(args) == 1:
158 ifp = open(args[0])
Fred Drake30a68c71998-11-23 16:59:39 +0000159 ofp = sys.stdout
Fred Drakef077b9d1998-12-01 19:01:53 +0000160 elif len(args) == 2:
161 ifp = open(args[0])
162 ofp = open(args[1], "w")
Fred Drake30a68c71998-11-23 16:59:39 +0000163 else:
164 usage()
165 sys.exit(2)
166 # knownempties is ignored in the XML version
167 try:
Fred Drakef077b9d1998-12-01 19:01:53 +0000168 if xml and xmldecl:
169 opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n')
Fred Drake43278f01999-01-20 20:35:05 +0000170 convert(ifp, ofp, autoclose)
Fred Drake30a68c71998-11-23 16:59:39 +0000171 except IOError, (err, msg):
172 if err != errno.EPIPE:
173 raise
174
175
176if __name__ == "__main__":
177 main()