Added a few more passes through the document fragment. Not actually very interesting.

commit: 865e9ff98e2a5f77bf01aad6d4d7e7ec5ca7c9bd [log] [tgz]
author: Fred Drake <fdrake@acm.org> Thu Jul 29 22:23:19 1999 +0000
committer: Fred Drake <fdrake@acm.org> Thu Jul 29 22:23:19 1999 +0000
tree: eec9fc580bba07a61a5b1cc4dc0ae8a4bb1c19c3
parent: 96e4a06fa6de789770f154fa651adcf057c57fcf [diff] [blame]
diff --git a/Doc/tools/sgmlconv/docfixer.py b/Doc/tools/sgmlconv/docfixer.py
index f700134..5328a50 100755
--- a/Doc/tools/sgmlconv/docfixer.py
+++ b/Doc/tools/sgmlconv/docfixer.py

@@ -22,6 +22,8 @@
     pass
 
 
+PARA_ELEMENT = "para"
+
 DEBUG_PARA_FIXER = 0
 
 if DEBUG_PARA_FIXER:
@@ -77,7 +79,17 @@
                 nodes.append(child)
             for node in child.getElementsByTagName(gi):
                 nodes.append(node)
-    return nodes        
+    return nodes
+
+def find_all_elements_from_set(doc, gi_set, nodes=None):
+    if nodes is None:
+        nodes = []
+    if doc.nodeType == ELEMENT and doc.tagName in gi_set:
+        nodes.append(doc)
+    for child in doc.childNodes:
+        if child.nodeType == ELEMENT:
+            find_all_elements_from_set(child, gi_set, nodes)
+    return nodes
 
 
 def simplify(doc, fragment):
@@ -108,7 +120,7 @@
             docelem.insertBefore(text, docelem.firstChild)
             docelem.insertBefore(node, text)
         docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
-    while fragment.firstChild.nodeType == TEXT:
+    while fragment.firstChild and fragment.firstChild.nodeType == TEXT:
         fragment.removeChild(fragment.firstChild)
 
 
@@ -291,8 +303,8 @@
         docelem.appendChild(doc.createTextNode("\n"))
 
 
-def handle_labels(doc):
-    for label in find_all_elements(doc, "label"):
+def handle_labels(doc, fragment):
+    for label in find_all_elements(fragment, "label"):
         id = label.getAttribute("id")
         if not id:
             continue
@@ -303,6 +315,11 @@
             parent.setAttribute("id", id)
         # now, remove <label id="..."/> from parent:
         parent.removeChild(label)
+        if parent.tagName == "title":
+            parent.normalize()
+            children = parent.childNodes
+            if children[-1].nodeType == TEXT:
+                children[-1].data = string.rstrip(children[-1].data)
 
 
 def fixup_trailing_whitespace(doc, wsmap):
@@ -587,25 +604,27 @@
 RECURSE_INTO_PARA_CONTAINERS = (
     "chapter", "abstract", "enumerate",
     "section", "subsection", "subsubsection",
-    "paragraph", "subparagraph",
+    "paragraph", "subparagraph", "back-matter",
     "howto", "manual",
     )
 
 PARA_LEVEL_ELEMENTS = (
     "moduleinfo", "title", "verbatim", "enumerate", "item",
-    "interpreter-session",
+    "interpreter-session", "back-matter", "interactive-session",
     "opcodedesc", "classdesc", "datadesc",
-    "funcdesc", "methoddesc", "excdesc",
+    "funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni",
     "funcdescni", "methoddescni", "excdescni",
     "tableii", "tableiii", "tableiv", "localmoduletable",
     "sectionauthor", "seealso",
     # include <para>, so we can just do it again to get subsequent paras:
-    "para",
+    PARA_ELEMENT,
     )
 
 PARA_LEVEL_PRECEEDERS = (
     "index", "indexii", "indexiii", "indexiv", "setindexsubitem",
     "stindex", "obindex", "COMMENT", "label", "input", "title",
+    "versionadded", "versionchanged", "declaremodule", "modulesynopsis",
+    "moduleauthor",
     )
 
 
@@ -680,7 +699,7 @@
         if string.rstrip(data) != data:
             have_last = 0
             child.splitText(len(string.rstrip(data)))
-    para = doc.createElement("para")
+    para = doc.createElement(PARA_ELEMENT)
     prev = None
     indexes = range(start, after)
     indexes.reverse()
@@ -789,6 +808,98 @@
             verbatim._node.name = "interactive-session"
 
 
+def add_node_ids(fragment, counter=0):
+    fragment._node.node_id = counter
+    for node in fragment.childNodes:
+        counter = counter + 1
+        if node.nodeType == ELEMENT:
+            counter = add_node_ids(node, counter)
+        else:
+            node._node.node_id = counter
+    return counter + 1
+
+
+REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex',
+                        'refexmodindex', 'refstmodindex')
+
+def fixup_refmodindexes(fragment):
+    # Locate <ref*modindex>...</> co-located with <module>...</>, and
+    # remove the <ref*modindex>, replacing it with index=index on the
+    # <module> element.
+    nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS)
+    d = {}
+    for node in nodes:
+        parent = node.parentNode
+        d[parent._node.node_id] = parent
+    del nodes
+    map(fixup_refmodindexes_chunk, d.values())
+
+
+def fixup_refmodindexes_chunk(container):
+    # node is probably a <para>; let's see how often it isn't:
+    if container.tagName != PARA_ELEMENT:
+        sys.stderr.write("--- fixup_refmodindexes_chunk(%s)\n" % container)
+    module_entries = find_all_elements(container, "module")
+    if not module_entries:
+        return
+    index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS)
+    removes = []
+    for entry in index_entries:
+        children = entry.childNodes
+        if len(children) != 0:
+            sys.stderr.write(
+                "--- unexpected number of children for %s node:\n"
+                % entry.tagName)
+            sys.stderr.write(entry.toxml() + "\n")
+            continue
+        found = 0
+        module_name = entry.getAttribute("name")
+        for node in module_entries:
+            if len(node.childNodes) != 1:
+                continue
+            this_name = node.childNodes[0].data
+            if this_name == module_name:
+                found = 1
+                node.setAttribute("index", "index")
+        if found:
+            removes.append(entry)
+    for node in removes:
+        container.removeChild(node)
+
+
+def fixup_bifuncindexes(fragment):
+    nodes = find_all_elements(fragment, 'bifuncindex')
+    d = {}
+    for node in nodes:
+        parent = node.parentNode
+        d[parent._node.node_id] = parent
+    del nodes
+    map(fixup_bifuncindexes_chunk, d.values())
+
+
+def fixup_bifuncindexes_chunk(container):
+    removes = []
+    entries = find_all_elements(container, "bifuncindex")
+    function_entries = find_all_elements(container, "function")
+    for entry in entries:
+        function_name = entry.getAttribute("name")
+        found = 0
+        for func_entry in function_entries:
+            t2 = func_entry.childNodes[0].data
+            if t2[-2:] != "()":
+                continue
+            t2 = t2[:-2]
+            if t2 == function_name:
+                
+                func_entry.setAttribute("index", "index")
+                func_entry.setAttribute("module", "__builtin__")
+                if not found:
+                    removes.append(entry)
+                    found = 1
+    for entry in removes:
+        container.removeChild(entry)
+
+
 _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
 
 def write_esis(doc, ofp, knownempty):
@@ -798,7 +909,8 @@
             gi = node.tagName
             if knownempty(gi):
                 if node.hasChildNodes():
-                    raise ValueError, "declared-empty node has children"
+                    raise ValueError, \
+                          "declared-empty node <%s> has children" % gi
                 ofp.write("e\n")
             for k, v in node.attributes.items():
                 value = v.value
@@ -823,7 +935,7 @@
     fragment = p.fragment
     normalize(fragment)
     simplify(doc, fragment)
-    handle_labels(fragment)
+    handle_labels(doc, fragment)
     handle_appendix(doc, fragment)
     fixup_trailing_whitespace(doc, {
         "abstract": "\n",
@@ -855,6 +967,9 @@
     fixup_table_structures(doc, fragment)
     fixup_rfc_references(doc, fragment)
     fixup_signatures(doc, fragment)
+    add_node_ids(fragment)
+    fixup_refmodindexes(fragment)
+    fixup_bifuncindexes(fragment)
     #
     d = {}
     for gi in p.get_empties():
commit	865e9ff98e2a5f77bf01aad6d4d7e7ec5ca7c9bd	[log] [tgz]
author	Fred Drake <fdrake@acm.org>	Thu Jul 29 22:23:19 1999 +0000
committer	Fred Drake <fdrake@acm.org>	Thu Jul 29 22:23:19 1999 +0000
tree	eec9fc580bba07a61a5b1cc4dc0ae8a4bb1c19c3
parent	96e4a06fa6de789770f154fa651adcf057c57fcf [diff] [blame]