Lots of small changes to make this work with the Python DOM bindings (minidom in particular); it was using PyDOM which is now obsolete. Only write the output file on success -- this avoids updating the timestamp on the file on failure, which confuses "make".

commit: 3e8f921fb9c6b33a2222251ba2f019e32982fa0a [log] [tgz]
author: Fred Drake <fdrake@acm.org> Fri Mar 23 17:01:47 2001 +0000
committer: Fred Drake <fdrake@acm.org> Fri Mar 23 17:01:47 2001 +0000
tree: fde2646746514f52c7275ad98de1049dbe97ffd3
parent: 7519e7af4207b46909de6374adac520f9205fe4f [diff]
diff --git a/Doc/tools/sgmlconv/docfixer.py b/Doc/tools/sgmlconv/docfixer.py
index 7fa856f..1e9b5c3 100755
--- a/Doc/tools/sgmlconv/docfixer.py
+++ b/Doc/tools/sgmlconv/docfixer.py

@@ -11,12 +11,12 @@
 import re
 import string
 import sys
-import xml.dom.core
+import xml.dom
+import xml.dom.minidom
 
-from xml.dom.core import \
-     ELEMENT, \
-     ENTITY_REFERENCE, \
-     TEXT
+ELEMENT = xml.dom.Node.ELEMENT_NODE
+ENTITY_REFERENCE = xml.dom.Node.ENTITY_REFERENCE_NODE
+TEXT = xml.dom.Node.TEXT_NODE
 
 
 class ConversionError(Exception):
@@ -49,32 +49,9 @@
         pass
 
 
-# Workaround to deal with invalid documents (multiple root elements).  This
-# does not indicate a bug in the DOM implementation.
-#
-def get_documentElement(doc):
-    docelem = None
-    for n in doc.childNodes:
-        if n.nodeType == ELEMENT:
-            docelem = n
-    return docelem
-
-xml.dom.core.Document.get_documentElement = get_documentElement
-
-
-# Replace get_childNodes for the Document class; without this, children
-# accessed from the Document object via .childNodes (no matter how many
-# levels of access are used) will be given an ownerDocument of None.
-#
-def get_childNodes(doc):
-    return xml.dom.core.NodeList(doc._node.children, doc._node)
-
-xml.dom.core.Document.get_childNodes = get_childNodes
-
-
 def get_first_element(doc, gi):
     for n in doc.childNodes:
-        if n.get_nodeName() == gi:
+        if n.nodeName == gi:
             return n
 
 def extract_first_element(doc, gi):
@@ -84,13 +61,25 @@
     return node
 
 
+def get_documentElement(node):
+    result = None
+    for child in node.childNodes:
+        if child.nodeType == ELEMENT:
+            result = child
+    return result
+
+
+def set_tagName(elem, gi):
+    elem.nodeName = elem.tagName = gi
+
+
 def find_all_elements(doc, gi):
     nodes = []
-    if doc.get_nodeName() == gi:
+    if doc.nodeName == gi:
         nodes.append(doc)
     for child in doc.childNodes:
         if child.nodeType == ELEMENT:
-            if child.get_tagName() == gi:
+            if child.tagName == gi:
                 nodes.append(child)
             for node in child.getElementsByTagName(gi):
                 nodes.append(node)
@@ -99,18 +88,19 @@
 def find_all_child_elements(doc, gi):
     nodes = []
     for child in doc.childNodes:
-        if child.get_nodeName() == gi:
+        if child.nodeName == gi:
             nodes.append(child)
     return nodes
 
+
 def find_all_elements_from_set(doc, gi_set):
     return __find_all_elements_from_set(doc, gi_set, [])
 
 def __find_all_elements_from_set(doc, gi_set, nodes):
-    if doc.get_nodeName() in gi_set:
+    if doc.nodeName in gi_set:
         nodes.append(doc)
     for child in doc.childNodes:
-        if child.get_nodeType() == ELEMENT:
+        if child.nodeType == ELEMENT:
             __find_all_elements_from_set(child, gi_set, nodes)
     return nodes
 
@@ -129,7 +119,7 @@
     # update the name of the root element
     node = get_first_element(fragment, "document")
     if node is not None:
-        node._node.name = documentclass
+        set_tagName(node, documentclass)
     while 1:
         node = extract_first_element(fragment, "input")
         if node is None:
@@ -143,7 +133,7 @@
             docelem.insertBefore(text, docelem.firstChild)
             docelem.insertBefore(node, text)
         docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
-    while fragment.firstChild and fragment.firstChild.get_nodeType() == TEXT:
+    while fragment.firstChild and fragment.firstChild.nodeType == TEXT:
         fragment.removeChild(fragment.firstChild)
 
 
@@ -153,9 +143,9 @@
     for n in doc.childNodes:
         prevskip = skip
         skip = 0
-        if n.get_nodeType() == TEXT and not prevskip:
+        if n.nodeType == TEXT and not prevskip:
             discards.append(n)
-        elif n.get_nodeName() == "COMMENT":
+        elif n.nodeName == "COMMENT":
             skip = 1
     for node in discards:
         doc.removeChild(node)
@@ -177,8 +167,8 @@
 def find_and_fix_descriptors(doc, container):
     children = container.childNodes
     for child in children:
-        if child.get_nodeType() == ELEMENT:
-            tagName = child.get_tagName()
+        if child.nodeType == ELEMENT:
+            tagName = child.tagName
             if tagName in DESCRIPTOR_ELEMENTS:
                 rewrite_descriptor(doc, child)
             elif tagName == "subsection":
@@ -200,12 +190,12 @@
     #   6. Put it back together.
     #
     # 1.
-    descname = descriptor.get_tagName()
+    descname = descriptor.tagName
     index = 1
     if descname[-2:] == "ni":
         descname = descname[:-2]
         descriptor.setAttribute("index", "no")
-        descriptor._node.name = descname
+        set_tagName(descriptor, descname)
         index = 0
     desctype = descname[:-4] # remove 'desc'
     linename = desctype + "line"
@@ -219,7 +209,7 @@
     name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
     descriptor.removeAttribute("name")
     # 2a.
-    if descriptor.attributes.has_key("var"):
+    if descriptor.hasAttribute("var"):
         if descname != "opcodedesc":
             raise RuntimeError, \
                   "got 'var' attribute on descriptor other than opcodedesc"
@@ -245,10 +235,15 @@
     # 3, 4.
     pos = skip_leading_nodes(children, pos)
     while pos < len(children) \
-          and children[pos].get_nodeName() in (linename, "versionadded"):
-        if children[pos].get_tagName() == linename:
+          and children[pos].nodeName in (linename, "versionadded"):
+        if children[pos].tagName == linename:
             # this is really a supplemental signature, create <signature>
-            sig = methodline_to_signature(doc, children[pos])
+            oldchild = children[pos].cloneNode(1)
+            try:
+                sig = methodline_to_signature(doc, children[pos])
+            except KeyError:
+                print oldchild.toxml()
+                raise
             newchildren.append(sig)
         else:
             # <versionadded added=...>
@@ -301,7 +296,7 @@
 def handle_appendix(doc, fragment):
     # must be called after simplfy() if document is multi-rooted to begin with
     docelem = get_documentElement(fragment)
-    toplevel = docelem.get_tagName() == "manual" and "chapter" or "section"
+    toplevel = docelem.tagName == "manual" and "chapter" or "section"
     appendices = 0
     nodes = []
     for node in docelem.childNodes:
@@ -333,7 +328,7 @@
         if not id:
             continue
         parent = label.parentNode
-        parentTagName = parent.get_tagName()
+        parentTagName = parent.tagName
         if parentTagName == "title":
             parent.parentNode.setAttribute("id", id)
         else:
@@ -352,8 +347,8 @@
     while queue:
         node = queue[0]
         del queue[0]
-        if wsmap.has_key(node.get_nodeName()):
-            ws = wsmap[node.get_tagName()]
+        if wsmap.has_key(node.nodeName):
+            ws = wsmap[node.tagName]
             children = node.childNodes
             children.reverse()
             if children[0].nodeType == TEXT:
@@ -361,8 +356,8 @@
                 children[0].data = data
             children.reverse()
             # hack to get the title in place:
-            if node.get_tagName() == "title" \
-               and node.parentNode.firstChild.get_nodeType() == ELEMENT:
+            if node.tagName == "title" \
+               and node.parentNode.firstChild.nodeType == ELEMENT:
                 node.parentNode.insertBefore(doc.createText("\n  "),
                                              node.parentNode.firstChild)
         for child in node.childNodes:
@@ -388,7 +383,7 @@
     while queue:
         node = queue[0]
         del queue[0]
-        if rewrite_element(node.get_tagName()):
+        if rewrite_element(node.tagName):
             children = node.childNodes
             if len(children) == 1 \
                and children[0].nodeType == TEXT:
@@ -411,7 +406,7 @@
         if nodeType != r.nodeType:
             return 0
         if nodeType == ELEMENT:
-            if l.get_tagName() != r.get_tagName():
+            if l.tagName != r.tagName:
                 return 0
             # should check attributes, but that's not a problem here
             if not contents_match(l, r):
@@ -430,19 +425,19 @@
     node = extract_first_element(section, "modulesynopsis")
     if node is None:
         return
-    node._node.name = "synopsis"
+    set_tagName(node, "synopsis")
     lastchild = node.childNodes[-1]
     if lastchild.nodeType == TEXT \
        and lastchild.data[-1:] == ".":
         lastchild.data = lastchild.data[:-1]
     modauthor = extract_first_element(section, "moduleauthor")
     if modauthor:
-        modauthor._node.name = "author"
+        set_tagName(modauthor, "author")
         modauthor.appendChild(doc.createTextNode(
             modauthor.getAttribute("name")))
         modauthor.removeAttribute("name")
     platform = extract_first_element(section, "platform")
-    if section.get_tagName() == "section":
+    if section.tagName == "section":
         modinfo_pos = 2
         modinfo = doc.createElement("moduleinfo")
         moddecl = extract_first_element(section, "declaremodule")
@@ -467,13 +462,13 @@
         if title:
             children = title.childNodes
             if len(children) >= 2 \
-               and children[0].get_nodeName() == "module" \
+               and children[0].nodeName == "module" \
                and children[0].childNodes[0].data == name:
                 # this is it; morph the <title> into <short-synopsis>
                 first_data = children[1]
                 if first_data.data[:4] == " ---":
                     first_data.data = string.lstrip(first_data.data[4:])
-                title._node.name = "short-synopsis"
+                set_tagName(title, "short-synopsis")
                 if children[-1].nodeType == TEXT \
                    and children[-1].data[-1:] == ".":
                     children[-1].data = children[-1].data[:-1]
@@ -511,7 +506,7 @@
         children = section.childNodes
         for i in range(len(children)):
             node = children[i]
-            if node.get_nodeName() == "moduleinfo":
+            if node.nodeName == "moduleinfo":
                 nextnode = children[i+1]
                 if nextnode.nodeType == TEXT:
                     data = nextnode.data
@@ -544,7 +539,7 @@
     children = table.childNodes
     for child in children:
         if child.nodeType == ELEMENT:
-            tagName = child.get_tagName()
+            tagName = child.tagName
             if tagName == "hline" and prev_row is not None:
                 prev_row.setAttribute("rowsep", "1")
             elif tagName == "row":
@@ -558,13 +553,14 @@
         nodeType = child.nodeType
         if nodeType == TEXT:
             if string.strip(child.data):
-                raise ConversionError("unexpected free data in table")
+                raise ConversionError("unexpected free data in <%s>: %r"
+                                      % (table.tagName, child.data))
             table.removeChild(child)
             continue
         if nodeType == ELEMENT:
-            if child.get_tagName() != "hline":
+            if child.tagName != "hline":
                 raise ConversionError(
-                    "unexpected <%s> in table" % child.get_tagName())
+                    "unexpected <%s> in table" % child.tagName)
             table.removeChild(child)
             continue
         raise ConversionError(
@@ -593,7 +589,7 @@
 def move_elements_by_name(doc, source, dest, name, sep=None):
     nodes = []
     for child in source.childNodes:
-        if child.get_nodeName() == name:
+        if child.nodeName == name:
             nodes.append(child)
     for node in nodes:
         source.removeChild(node)
@@ -633,7 +629,7 @@
 
 def fixup_paras(doc, fragment):
     for child in fragment.childNodes:
-        if child.get_nodeName() in RECURSE_INTO_PARA_CONTAINERS:
+        if child.nodeName in RECURSE_INTO_PARA_CONTAINERS:
             fixup_paras_helper(doc, child)
     descriptions = find_all_elements(fragment, "description")
     for description in descriptions:
@@ -645,7 +641,7 @@
     children = container.childNodes
     start = skip_leading_nodes(children)
     while len(children) > start:
-        if children[start].get_nodeName() in RECURSE_INTO_PARA_CONTAINERS:
+        if children[start].nodeName in RECURSE_INTO_PARA_CONTAINERS:
             # Something to recurse into:
             fixup_paras_helper(doc, children[start])
         else:
@@ -668,7 +664,7 @@
         child = children[j]
         nodeType = child.nodeType
         if nodeType == ELEMENT:
-            if child.get_tagName() in BREAK_ELEMENTS:
+            if child.tagName in BREAK_ELEMENTS:
                 after = j
                 break
         elif nodeType == TEXT:
@@ -742,7 +738,7 @@
                 return start
             # all whitespace, just skip
         elif nodeType == ELEMENT:
-            tagName = child.get_tagName()
+            tagName = child.tagName
             if tagName in RECURSE_INTO_PARA_CONTAINERS:
                 return start
             if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
@@ -772,7 +768,7 @@
 
 def fixup_args(doc, arglist):
     for child in arglist.childNodes:
-        if child.get_nodeName() == "optional":
+        if child.nodeName == "optional":
             # found it; fix and return
             arglist.insertBefore(doc.createTextNode("["), child)
             optkids = child.childNodes
@@ -789,13 +785,13 @@
     for sectauth in find_all_elements(fragment, "sectionauthor"):
         section = sectauth.parentNode
         section.removeChild(sectauth)
-        sectauth._node.name = "author"
+        set_tagName(sectauth, "author")
         sectauth.appendChild(doc.createTextNode(
             sectauth.getAttribute("name")))
         sectauth.removeAttribute("name")
         after = section.childNodes[2]
         title = section.childNodes[1]
-        if title.get_nodeName() != "title":
+        if title.nodeName != "title":
             after = section.childNodes[0]
         section.insertBefore(doc.createTextNode("\n  "), after)
         section.insertBefore(sectauth, after)
@@ -806,17 +802,17 @@
         child = verbatim.childNodes[0]
         if child.nodeType == TEXT \
            and string.lstrip(child.data)[:3] == ">>>":
-            verbatim._node.name = "interactive-session"
+            set_tagName(verbatim, "interactive-session")
 
 
 def add_node_ids(fragment, counter=0):
-    fragment._node.node_id = counter
+    fragment.node_id = counter
     for node in fragment.childNodes:
         counter = counter + 1
         if node.nodeType == ELEMENT:
             counter = add_node_ids(node, counter)
         else:
-            node._node.node_id = counter
+            node.node_id = counter
     return counter + 1
 
 
@@ -831,14 +827,14 @@
     d = {}
     for node in nodes:
         parent = node.parentNode
-        d[parent._node.node_id] = parent
+        d[parent.node_id] = parent
     del nodes
     map(fixup_refmodindexes_chunk, d.values())
 
 
 def fixup_refmodindexes_chunk(container):
     # node is probably a <para>; let's see how often it isn't:
-    if container.get_tagName() != PARA_ELEMENT:
+    if container.tagName != PARA_ELEMENT:
         bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
     module_entries = find_all_elements(container, "module")
     if not module_entries:
@@ -849,7 +845,7 @@
         children = entry.childNodes
         if len(children) != 0:
             bwrite("--- unexpected number of children for %s node:\n"
-                   % entry.get_tagName())
+                   % entry.tagName)
             ewrite(entry.toxml() + "\n")
             continue
         found = 0
@@ -873,7 +869,7 @@
     # make sure that each parent is only processed once:
     for node in nodes:
         parent = node.parentNode
-        d[parent._node.node_id] = parent
+        d[parent.node_id] = parent
     del nodes
     map(fixup_bifuncindexes_chunk, d.values())
 
@@ -905,7 +901,7 @@
     while queue:
         parent = queue.pop()
         i = 0
-        children = parent.get_childNodes()
+        children = parent.childNodes
         nchildren = len(children)
         while i < (nchildren - 1):
             child = children[i]
@@ -914,7 +910,7 @@
                     ewrite("--- merging two <%s/> elements\n" % gi)
                     child = children[i]
                     nextchild = children[i+1]
-                    nextchildren = nextchild.get_childNodes()
+                    nextchildren = nextchild.childNodes
                     while len(nextchildren):
                         node = nextchildren[0]
                         nextchild.removeChild(node)
@@ -932,14 +928,13 @@
     for node in doc.childNodes:
         nodeType = node.nodeType
         if nodeType == ELEMENT:
-            gi = node.get_tagName()
+            gi = node.tagName
             if knownempty(gi):
                 if node.hasChildNodes():
                     raise ValueError, \
                           "declared-empty node <%s> has children" % gi
                 ofp.write("e\n")
-            for k, v in node.attributes.items():
-                value = v.value
+            for k, value in node.attributes.items():
                 if _token_rx.match(value):
                     dtype = "TOKEN"
                 else:
@@ -951,16 +946,17 @@
         elif nodeType == TEXT:
             ofp.write("-%s\n" % esistools.encode(node.data))
         elif nodeType == ENTITY_REFERENCE:
-            ofp.write("&%s\n" % node.get_nodeName())
+            ofp.write("&%s\n" % node.nodeName)
         else:
             raise RuntimeError, "unsupported node type: %s" % nodeType
 
 
 def convert(ifp, ofp):
-    p = esistools.ExtendedEsisBuilder()
-    p.feed(ifp.read())
-    doc = p.document
-    fragment = p.fragment
+    events = esistools.parse(ifp)
+    toktype, doc = events.getEvent()
+    fragment = doc.createDocumentFragment()
+    events.expandNode(fragment)
+
     normalize(fragment)
     simplify(doc, fragment)
     handle_labels(doc, fragment)
@@ -994,8 +990,10 @@
     join_adjacent_elements(fragment, "option")
     #
     d = {}
-    for gi in p.get_empties():
+    for gi in events.parser.get_empties():
         d[gi] = gi
+    if d.has_key("author"):
+        del d["author"]
     if d.has_key("rfc"):
         del d["rfc"]
     knownempty = d.has_key
@@ -1019,11 +1017,17 @@
         ofp = sys.stdout
     elif len(sys.argv) == 3:
         ifp = open(sys.argv[1])
-        ofp = open(sys.argv[2], "w")
+        import StringIO
+        ofp = StringIO.StringIO()
     else:
         usage()
         sys.exit(2)
     convert(ifp, ofp)
+    if len(sys.argv) == 3:
+        fp = open(sys.argv[2], "w")
+        fp.write(ofp.getvalue())
+        fp.close()
+        ofp.close()
 
 
 if __name__ == "__main__":
commit	3e8f921fb9c6b33a2222251ba2f019e32982fa0a	[log] [tgz]
author	Fred Drake <fdrake@acm.org>	Fri Mar 23 17:01:47 2001 +0000
committer	Fred Drake <fdrake@acm.org>	Fri Mar 23 17:01:47 2001 +0000
tree	fde2646746514f52c7275ad98de1049dbe97ffd3
parent	7519e7af4207b46909de6374adac520f9205fe4f [diff]