blob: b8050c85cc4cb3cd75986a569ca40a708271f5fd [file] [log] [blame]
Fred Drake30a68c71998-11-23 16:59:39 +00001#! /usr/bin/env python
2
3"""Convert ESIS events to SGML or XML markup.
4
5This is limited, but seems sufficient for the ESIS generated by the
6latex2esis.py script when run over the Python documentation.
7"""
8__version__ = '$Revision$'
9
10import errno
Fred Drakef077b9d1998-12-01 19:01:53 +000011import esistools
Fred Drake36dfe581999-01-19 23:03:04 +000012import os
Fred Drake30a68c71998-11-23 16:59:39 +000013import re
14import string
15
Fred Drake79ad1f11999-01-14 17:06:09 +000016from xml.utils import escape
17
Fred Drake30a68c71998-11-23 16:59:39 +000018
Fred Drake36dfe581999-01-19 23:03:04 +000019EMPTIES_FILENAME = "../sgml/empties.dat"
20LIST_EMPTIES = 0
21
22
Fred Drakef82e4ab1999-01-19 17:10:31 +000023def format_attrs(attrs, xml=0):
Fred Drake30a68c71998-11-23 16:59:39 +000024 attrs = attrs.items()
25 attrs.sort()
26 s = ''
27 for name, value in attrs:
Fred Drakef82e4ab1999-01-19 17:10:31 +000028 if xml:
29 s = '%s %s="%s"' % (s, name, escape(value))
30 else:
31 # this is a little bogus, but should do for now
32 if name == value and isnmtoken(value):
33 s = "%s %s" % (s, value)
34 elif istoken(value):
35 s = "%s %s=%s" % (s, name, value)
36 else:
37 s = '%s %s="%s"' % (s, name, escape(value))
Fred Drake30a68c71998-11-23 16:59:39 +000038 return s
39
40
Fred Drake36dfe581999-01-19 23:03:04 +000041_nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE)
Fred Drakef82e4ab1999-01-19 17:10:31 +000042def isnmtoken(s):
43 return _nmtoken_rx.match(s) is not None
44
Fred Drake36dfe581999-01-19 23:03:04 +000045_token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE)
Fred Drakef82e4ab1999-01-19 17:10:31 +000046def istoken(s):
47 return _token_rx.match(s) is not None
48
49
Fred Drake4abcffb1998-12-10 18:31:37 +000050def do_convert(ifp, ofp, xml=0):
Fred Drake30a68c71998-11-23 16:59:39 +000051 attrs = {}
52 lastopened = None
Fred Drake4abcffb1998-12-10 18:31:37 +000053 knownempties = []
Fred Drake30a68c71998-11-23 16:59:39 +000054 knownempty = 0
55 lastempty = 0
56 while 1:
57 line = ifp.readline()
58 if not line:
59 break
60
61 type = line[0]
62 data = line[1:]
63 if data and data[-1] == "\n":
64 data = data[:-1]
65 if type == "-":
Fred Drakef077b9d1998-12-01 19:01:53 +000066 data = esistools.decode(data)
Fred Drake79ad1f11999-01-14 17:06:09 +000067 ofp.write(escape(data))
Fred Drake30a68c71998-11-23 16:59:39 +000068 if "\n" in data:
69 lastopened = None
70 knownempty = 0
71 lastempty = 0
72 elif type == "(":
Fred Drakef077b9d1998-12-01 19:01:53 +000073 if data == "COMMENT":
74 ofp.write("<!--")
75 continue
Fred Drake30a68c71998-11-23 16:59:39 +000076 if knownempty and xml:
Fred Drakef82e4ab1999-01-19 17:10:31 +000077 ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml)))
Fred Drake30a68c71998-11-23 16:59:39 +000078 else:
Fred Drakef82e4ab1999-01-19 17:10:31 +000079 ofp.write("<%s%s>" % (data, format_attrs(attrs, xml)))
Fred Drake30a68c71998-11-23 16:59:39 +000080 if knownempty and data not in knownempties:
81 # accumulate knowledge!
82 knownempties.append(data)
83 attrs = {}
84 lastopened = data
85 lastempty = knownempty
86 knownempty = 0
87 elif type == ")":
Fred Drakef077b9d1998-12-01 19:01:53 +000088 if data == "COMMENT":
89 ofp.write("-->")
90 continue
Fred Drake30a68c71998-11-23 16:59:39 +000091 if xml:
92 if not lastempty:
93 ofp.write("</%s>" % data)
94 elif data not in knownempties:
95 if lastopened == data:
96 ofp.write("</>")
97 else:
98 ofp.write("</%s>" % data)
99 lastopened = None
100 lastempty = 0
101 elif type == "A":
102 name, type, value = string.split(data, " ", 2)
Fred Drakef077b9d1998-12-01 19:01:53 +0000103 attrs[name] = esistools.decode(value)
Fred Drake30a68c71998-11-23 16:59:39 +0000104 elif type == "e":
105 knownempty = 1
106
Fred Drake36dfe581999-01-19 23:03:04 +0000107 if LIST_EMPTIES:
108 knownempties.append("")
109 if os.path.isfile(EMPTIES_FILENAME):
110 mode = "a"
111 else:
112 mode = "w"
113 fp = open(EMPTIES_FILENAME, mode)
114 fp.write(string.join(knownempties, "\n"))
115 fp.close()
116
Fred Drake30a68c71998-11-23 16:59:39 +0000117
Fred Drake4abcffb1998-12-10 18:31:37 +0000118def sgml_convert(ifp, ofp):
119 return do_convert(ifp, ofp, xml=0)
Fred Drake30a68c71998-11-23 16:59:39 +0000120
121
Fred Drake4abcffb1998-12-10 18:31:37 +0000122def xml_convert(ifp, ofp):
123 return do_convert(ifp, ofp, xml=1)
Fred Drake30a68c71998-11-23 16:59:39 +0000124
125
126def main():
Fred Drakef077b9d1998-12-01 19:01:53 +0000127 import getopt
Fred Drake30a68c71998-11-23 16:59:39 +0000128 import sys
129 #
130 convert = sgml_convert
Fred Drakef077b9d1998-12-01 19:01:53 +0000131 xml = 0
132 xmldecl = 0
133 opts, args = getopt.getopt(sys.argv[1:], "dx", ["declare", "xml"])
134 for opt, arg in opts:
135 if opt in ("-d", "--declare"):
136 xmldecl = 1
137 elif opt in ("-x", "--xml"):
138 xml = 1
139 convert = xml_convert
140 if len(args) == 0:
Fred Drake30a68c71998-11-23 16:59:39 +0000141 ifp = sys.stdin
142 ofp = sys.stdout
Fred Drakef077b9d1998-12-01 19:01:53 +0000143 elif len(args) == 1:
144 ifp = open(args[0])
Fred Drake30a68c71998-11-23 16:59:39 +0000145 ofp = sys.stdout
Fred Drakef077b9d1998-12-01 19:01:53 +0000146 elif len(args) == 2:
147 ifp = open(args[0])
148 ofp = open(args[1], "w")
Fred Drake30a68c71998-11-23 16:59:39 +0000149 else:
150 usage()
151 sys.exit(2)
152 # knownempties is ignored in the XML version
153 try:
Fred Drakef077b9d1998-12-01 19:01:53 +0000154 if xml and xmldecl:
155 opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n')
Fred Drake30a68c71998-11-23 16:59:39 +0000156 convert(ifp, ofp)
157 except IOError, (err, msg):
158 if err != errno.EPIPE:
159 raise
160
161
162if __name__ == "__main__":
163 main()