| #! /usr/bin/env python |
| |
| """Convert ESIS events to SGML or XML markup. |
| |
| This is limited, but seems sufficient for the ESIS generated by the |
| latex2esis.py script when run over the Python documentation. |
| """ |
| |
| # This should have an explicit option to indicate whether the *INPUT* was |
| # generated from an SGML or an XML application. |
| |
| __version__ = '$Revision$' |
| |
| import errno |
| import esistools |
| import os |
| import re |
| import string |
| |
| from xml.utils import escape |
| |
| |
| AUTOCLOSE = () |
| |
| EMPTIES_FILENAME = "../sgml/empties.dat" |
| LIST_EMPTIES = 0 |
| |
| |
| _elem_map = {} |
| _attr_map = {} |
| _token_map = {} |
| |
| _normalize_case = str |
| |
| def map_gi(sgmlgi, map): |
| uncased = _normalize_case(sgmlgi) |
| try: |
| return map[uncased] |
| except IndexError: |
| map[uncased] = sgmlgi |
| return sgmlgi |
| |
| def null_map_gi(sgmlgi, map): |
| return sgmlgi |
| |
| |
| def format_attrs(attrs, xml=0): |
| attrs = attrs.items() |
| attrs.sort() |
| parts = [] |
| append = parts.append |
| for name, value in attrs: |
| if xml: |
| append('%s="%s"' % (name, escape(value))) |
| else: |
| # this is a little bogus, but should do for now |
| if name == value and isnmtoken(value): |
| append(value) |
| elif istoken(value): |
| if value == "no" + name: |
| append(value) |
| else: |
| append("%s=%s" % (name, value)) |
| else: |
| append('%s="%s"' % (name, escape(value))) |
| if parts: |
| parts.insert(0, '') |
| return string.join(parts) |
| |
| |
| _nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE) |
| def isnmtoken(s): |
| return _nmtoken_rx.match(s) is not None |
| |
| _token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE) |
| def istoken(s): |
| return _token_rx.match(s) is not None |
| |
| |
| def convert(ifp, ofp, xml=0, autoclose=(), verbatims=()): |
| if xml: |
| autoclose = () |
| attrs = {} |
| lastopened = None |
| knownempties = [] |
| knownempty = 0 |
| lastempty = 0 |
| inverbatim = 0 |
| while 1: |
| line = ifp.readline() |
| if not line: |
| break |
| |
| type = line[0] |
| data = line[1:] |
| if data and data[-1] == "\n": |
| data = data[:-1] |
| if type == "-": |
| data = esistools.decode(data) |
| data = escape(data) |
| if not inverbatim: |
| data = string.replace(data, "---", "—") |
| ofp.write(data) |
| if "\n" in data: |
| lastopened = None |
| knownempty = 0 |
| lastempty = 0 |
| elif type == "(": |
| if data == "COMMENT": |
| ofp.write("<!--") |
| continue |
| data = map_gi(data, _elem_map) |
| if knownempty and xml: |
| ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml))) |
| else: |
| ofp.write("<%s%s>" % (data, format_attrs(attrs, xml))) |
| if knownempty and data not in knownempties: |
| # accumulate knowledge! |
| knownempties.append(data) |
| attrs = {} |
| lastopened = data |
| lastempty = knownempty |
| knownempty = 0 |
| inverbatim = data in verbatims |
| elif type == ")": |
| if data == "COMMENT": |
| ofp.write("-->") |
| continue |
| data = map_gi(data, _elem_map) |
| if xml: |
| if not lastempty: |
| ofp.write("</%s>" % data) |
| elif data not in knownempties: |
| if data in autoclose: |
| pass |
| elif lastopened == data: |
| ofp.write("</>") |
| else: |
| ofp.write("</%s>" % data) |
| lastopened = None |
| lastempty = 0 |
| inverbatim = 0 |
| elif type == "A": |
| name, type, value = string.split(data, " ", 2) |
| name = map_gi(name, _attr_map) |
| attrs[name] = esistools.decode(value) |
| elif type == "e": |
| knownempty = 1 |
| |
| if LIST_EMPTIES: |
| dump_empty_element_names(knownempties) |
| |
| |
| def dump_empty_element_names(knownempties): |
| d = {} |
| for gi in knownempties: |
| d[gi] = gi |
| knownempties.append("") |
| if os.path.isfile(EMPTIES_FILENAME): |
| fp = open(EMPTIES_FILENAME) |
| while 1: |
| line = fp.readline() |
| if not line: |
| break |
| gi = string.strip(line) |
| if gi: |
| d[gi] = gi |
| fp = open(EMPTIES_FILENAME, "w") |
| gilist = d.keys() |
| gilist.sort() |
| fp.write(string.join(gilist, "\n")) |
| fp.write("\n") |
| fp.close() |
| |
| |
| def update_gi_map(map, names, fromsgml=1): |
| for name in string.split(names, ","): |
| if fromsgml: |
| uncased = string.lower(name) |
| else: |
| uncased = name |
| map[uncased] = name |
| |
| |
| def main(): |
| import getopt |
| import sys |
| # |
| autoclose = AUTOCLOSE |
| xml = 1 |
| xmldecl = 0 |
| elem_names = '' |
| attr_names = '' |
| value_names = '' |
| verbatims = ('verbatim', 'interactive-session') |
| opts, args = getopt.getopt(sys.argv[1:], "adesx", |
| ["autoclose=", "declare", "sgml", "xml", |
| "elements-map=", "attributes-map", |
| "values-map="]) |
| for opt, arg in opts: |
| if opt in ("-d", "--declare"): |
| xmldecl = 1 |
| elif opt == "-e": |
| global LIST_EMPTIES |
| LIST_EMPTIES = 1 |
| elif opt in ("-s", "--sgml"): |
| xml = 0 |
| elif opt in ("-x", "--xml"): |
| xml = 1 |
| elif opt in ("-a", "--autoclose"): |
| autoclose = string.split(arg, ",") |
| elif opt == "--elements-map": |
| elem_names = ("%s,%s" % (elem_names, arg))[1:] |
| elif opt == "--attributes-map": |
| attr_names = ("%s,%s" % (attr_names, arg))[1:] |
| elif opt == "--values-map": |
| value_names = ("%s,%s" % (value_names, arg))[1:] |
| # |
| # open input streams: |
| # |
| if len(args) == 0: |
| ifp = sys.stdin |
| ofp = sys.stdout |
| elif len(args) == 1: |
| ifp = open(args[0]) |
| ofp = sys.stdout |
| elif len(args) == 2: |
| ifp = open(args[0]) |
| ofp = open(args[1], "w") |
| else: |
| usage() |
| sys.exit(2) |
| # |
| # setup the name maps: |
| # |
| if elem_names or attr_names or value_names: |
| # assume the origin was SGML; ignore case of the names from the ESIS |
| # stream but set up conversion tables to get the case right on output |
| global _normalize_case |
| _normalize_case = string.lower |
| update_gi_map(_elem_map, string.split(elem_names, ",")) |
| update_gi_map(_attr_map, string.split(attr_names, ",")) |
| update_gi_map(_values_map, string.split(value_names, ",")) |
| else: |
| global map_gi |
| map_gi = null_map_gi |
| # |
| # run the conversion: |
| # |
| try: |
| if xml and xmldecl: |
| opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n') |
| convert(ifp, ofp, xml=xml, autoclose=autoclose, verbatims=verbatims) |
| except IOError, (err, msg): |
| if err != errno.EPIPE: |
| raise |
| |
| |
| if __name__ == "__main__": |
| main() |