blob: 07ca57117ba2a085e5ebc17c969dab91122b159b [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
3"""Convert ESIS events to SGML or XML markup.
4
5This is limited, but seems sufficient for the ESIS generated by the
6latex2esis.py script when run over the Python documentation.
7"""
8__version__ = '$Revision$'
9
10import errno
Fred Drakef077b9d1998-12-01 19:01:53 +000011import esistools
Fred Drake30a68c71998-11-23 16:59:39 +000012import re
13import string
14
Fred Drake79ad1f11999-01-14 17:06:09 +000015from xml.utils import escape
16
Fred Drake30a68c71998-11-23 16:59:39 +000017
Fred Drakef82e4ab1999-01-19 17:10:31 +000018def format_attrs(attrs, xml=0):
Fred Drake30a68c71998-11-23 16:59:39 +000019 attrs = attrs.items()
20 attrs.sort()
21 s = ''
22 for name, value in attrs:
Fred Drakef82e4ab1999-01-19 17:10:31 +000023 if xml:
24 s = '%s %s="%s"' % (s, name, escape(value))
25 else:
26 # this is a little bogus, but should do for now
27 if name == value and isnmtoken(value):
28 s = "%s %s" % (s, value)
29 elif istoken(value):
30 s = "%s %s=%s" % (s, name, value)
31 else:
32 s = '%s %s="%s"' % (s, name, escape(value))
Fred Drake30a68c71998-11-23 16:59:39 +000033 return s
34
35
Fred Drakef82e4ab1999-01-19 17:10:31 +000036_nmtoken_rx = re.compile("[a-z][-._a-z0-9]*", re.IGNORECASE)
37def isnmtoken(s):
38 return _nmtoken_rx.match(s) is not None
39
40_token_rx = re.compile("[a-z0-9][-._a-z0-9]*", re.IGNORECASE)
41def istoken(s):
42 return _token_rx.match(s) is not None
43
44
Fred Drake4abcffb1998-12-10 18:31:37 +000045def do_convert(ifp, ofp, xml=0):
Fred Drake30a68c71998-11-23 16:59:39 +000046 attrs = {}
47 lastopened = None
Fred Drake4abcffb1998-12-10 18:31:37 +000048 knownempties = []
Fred Drake30a68c71998-11-23 16:59:39 +000049 knownempty = 0
50 lastempty = 0
51 while 1:
52 line = ifp.readline()
53 if not line:
54 break
55
56 type = line[0]
57 data = line[1:]
58 if data and data[-1] == "\n":
59 data = data[:-1]
60 if type == "-":
Fred Drakef077b9d1998-12-01 19:01:53 +000061 data = esistools.decode(data)
Fred Drake79ad1f11999-01-14 17:06:09 +000062 ofp.write(escape(data))
Fred Drake30a68c71998-11-23 16:59:39 +000063 if "\n" in data:
64 lastopened = None
65 knownempty = 0
66 lastempty = 0
67 elif type == "(":
Fred Drakef077b9d1998-12-01 19:01:53 +000068 if data == "COMMENT":
69 ofp.write("<!--")
70 continue
Fred Drake30a68c71998-11-23 16:59:39 +000071 if knownempty and xml:
Fred Drakef82e4ab1999-01-19 17:10:31 +000072 ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml)))
Fred Drake30a68c71998-11-23 16:59:39 +000073 else:
Fred Drakef82e4ab1999-01-19 17:10:31 +000074 ofp.write("<%s%s>" % (data, format_attrs(attrs, xml)))
Fred Drake30a68c71998-11-23 16:59:39 +000075 if knownempty and data not in knownempties:
76 # accumulate knowledge!
77 knownempties.append(data)
78 attrs = {}
79 lastopened = data
80 lastempty = knownempty
81 knownempty = 0
82 elif type == ")":
Fred Drakef077b9d1998-12-01 19:01:53 +000083 if data == "COMMENT":
84 ofp.write("-->")
85 continue
Fred Drake30a68c71998-11-23 16:59:39 +000086 if xml:
87 if not lastempty:
88 ofp.write("</%s>" % data)
89 elif data not in knownempties:
90 if lastopened == data:
91 ofp.write("</>")
92 else:
93 ofp.write("</%s>" % data)
94 lastopened = None
95 lastempty = 0
96 elif type == "A":
97 name, type, value = string.split(data, " ", 2)
Fred Drakef077b9d1998-12-01 19:01:53 +000098 attrs[name] = esistools.decode(value)
Fred Drake30a68c71998-11-23 16:59:39 +000099 elif type == "e":
100 knownempty = 1
101
102
Fred Drake4abcffb1998-12-10 18:31:37 +0000103def sgml_convert(ifp, ofp):
104 return do_convert(ifp, ofp, xml=0)
Fred Drake30a68c71998-11-23 16:59:39 +0000105
106
Fred Drake4abcffb1998-12-10 18:31:37 +0000107def xml_convert(ifp, ofp):
108 return do_convert(ifp, ofp, xml=1)
Fred Drake30a68c71998-11-23 16:59:39 +0000109
110
111def main():
Fred Drakef077b9d1998-12-01 19:01:53 +0000112 import getopt
Fred Drake30a68c71998-11-23 16:59:39 +0000113 import sys
114 #
115 convert = sgml_convert
Fred Drakef077b9d1998-12-01 19:01:53 +0000116 xml = 0
117 xmldecl = 0
118 opts, args = getopt.getopt(sys.argv[1:], "dx", ["declare", "xml"])
119 for opt, arg in opts:
120 if opt in ("-d", "--declare"):
121 xmldecl = 1
122 elif opt in ("-x", "--xml"):
123 xml = 1
124 convert = xml_convert
125 if len(args) == 0:
Fred Drake30a68c71998-11-23 16:59:39 +0000126 ifp = sys.stdin
127 ofp = sys.stdout
Fred Drakef077b9d1998-12-01 19:01:53 +0000128 elif len(args) == 1:
129 ifp = open(args[0])
Fred Drake30a68c71998-11-23 16:59:39 +0000130 ofp = sys.stdout
Fred Drakef077b9d1998-12-01 19:01:53 +0000131 elif len(args) == 2:
132 ifp = open(args[0])
133 ofp = open(args[1], "w")
Fred Drake30a68c71998-11-23 16:59:39 +0000134 else:
135 usage()
136 sys.exit(2)
137 # knownempties is ignored in the XML version
138 try:
Fred Drakef077b9d1998-12-01 19:01:53 +0000139 if xml and xmldecl:
140 opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n')
Fred Drake30a68c71998-11-23 16:59:39 +0000141 convert(ifp, ofp)
142 except IOError, (err, msg):
143 if err != errno.EPIPE:
144 raise
145
146
147if __name__ == "__main__":
148 main()