Blame - Doc/tools/sgmlconv/docfixer.py - platform/external/python/cpython3

blob: dff20760403563cc15dd8895d9f1529489b3da10 [file] [log] [blame]

Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	1	#! /usr/bin/env python
				2
				3	"""Promote the IDs from <label/> elements to the enclosing section / chapter /
				4	whatever, then remove the <label/> elements. This allows *ML style internal
				5	linking rather than the bogus LaTeX model.
				6
				7	Note that <label/>s in <title> elements are promoted two steps, since the
				8	<title> elements are artificially created from the section parameter, and the
				9	label really refers to the sectioning construct.
				10	"""
				11	__version__ = '$Revision$'
				12
				13
				14	import errno
				15	import string
				16	import sys
				17	import xml.dom.core
				18	import xml.dom.esis_builder
				19
				20
				21	# Workaround to deal with invalid documents (multiple root elements). This
				22	# does not indicate a bug in the DOM implementation.
				23	#
				24	def get_documentElement(self):
				25	docelem = None
				26	for n in self._node.children:
				27	if n.type == xml.dom.core.ELEMENT:
				28	docelem = xml.dom.core.Element(n, self, self)
				29	return docelem
				30
				31	xml.dom.core.Document.get_documentElement = get_documentElement
				32
				33
				34	# Replace get_childNodes for the Document class; without this, children
				35	# accessed from the Document object via .childNodes (no matter how many
				36	# levels of access are used) will be given an ownerDocument of None.
				37	#
				38	def get_childNodes(self):
				39	return xml.dom.core.NodeList(self._node.children, self, self)
				40
				41	xml.dom.core.Document.get_childNodes = get_childNodes
				42
				43
				44	def get_first_element(doc, gi):
				45	for n in doc.childNodes:
				46	if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
				47	return n
				48
				49	def extract_first_element(doc, gi):
				50	node = get_first_element(doc, gi)
				51	if node is not None:
				52	doc.removeChild(node)
				53	return node
				54
				55
				56	def simplify(doc):
				57	# Try to rationalize the document a bit, since these things are simply
				58	# not valid SGML/XML documents as they stand, and need a little work.
				59	documentclass = "document"
				60	inputs = []
				61	node = extract_first_element(doc, "documentclass")
				62	if node is not None:
				63	documentclass = node.getAttribute("classname")
				64	node = extract_first_element(doc, "title")
				65	if node is not None:
				66	inputs.append(node)
				67	# update the name of the root element
				68	node = get_first_element(doc, "document")
				69	if node is not None:
				70	node._node.name = documentclass
				71	while 1:
				72	node = extract_first_element(doc, "input")
				73	if node is None:
				74	break
				75	inputs.append(node)
				76	if inputs:
				77	docelem = doc.documentElement
				78	inputs.reverse()
				79	for node in inputs:
				80	text = doc.createTextNode("\n")
				81	docelem.insertBefore(text, docelem.firstChild)
				82	docelem.insertBefore(node, text)
				83	docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
				84	while doc.firstChild.nodeType == xml.dom.core.TEXT:
				85	doc.removeChild(doc.firstChild)
				86
				87
				88	def cleanup_root_text(doc):
				89	discards = []
				90	skip = 0
				91	for n in doc.childNodes:
				92	prevskip = skip
				93	skip = 0
				94	if n.nodeType == xml.dom.core.TEXT and not prevskip:
				95	discards.append(n)
				96	elif n.nodeType == xml.dom.core.COMMENT:
				97	skip = 1
				98	for node in discards:
				99	doc.removeChild(node)
				100
				101
				102	def rewrite_desc_entries(doc, argname_gi):
				103	argnodes = doc.getElementsByTagName(argname_gi)
				104	for node in argnodes:
				105	parent = node.parentNode
				106	nodes = []
				107	for n in parent.childNodes:
				108	if n.nodeType != xml.dom.core.ELEMENT or n.tagName != argname_gi:
				109	nodes.append(n)
				110	desc = doc.createElement("description")
				111	for n in nodes:
				112	parent.removeChild(n)
				113	desc.appendChild(n)
				114	if node.childNodes:
				115	# keep the <args>...</args>, newline & indent
				116	parent.insertBefore(doc.createText("\n "), node)
				117	else:
				118	# no arguments, remove the <args/> node
				119	parent.removeChild(node)
				120	parent.appendChild(doc.createText("\n "))
				121	parent.appendChild(desc)
				122	parent.appendChild(doc.createText("\n"))
				123
				124	def handle_args(doc):
				125	rewrite_desc_entries(doc, "args")
				126	rewrite_desc_entries(doc, "constructor-args")
				127
				128
				129	def handle_comments(doc, node=None):
				130	if node is None:
				131	node = doc
				132	for n in node.childNodes:
				133	if n.nodeType == xml.dom.core.ELEMENT:
				134	if n.tagName == "COMMENT":
				135	comment = doc.createComment(n.childNodes[0].data)
				136	node.replaceChild(comment, n)
				137	else:
				138	handle_comments(doc, n)
				139
				140
				141	def handle_labels(doc):
				142	labels = doc.getElementsByTagName("label")
				143	for label in labels:
				144	id = label.getAttribute("id")
				145	if not id:
				146	continue
				147	parent = label.parentNode
				148	if parent.tagName == "title":
				149	parent.parentNode.setAttribute("id", id)
				150	else:
				151	parent.setAttribute("id", id)
				152	# now, remove <label id="..."/> from parent:
				153	parent.removeChild(label)
				154
				155
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	156	def fixup_trailing_whitespace(doc, wsmap):
				157	queue = [doc]
				158	while queue:
				159	node = queue[0]
				160	del queue[0]
				161	if node.nodeType == xml.dom.core.ELEMENT \
				162	and wsmap.has_key(node.tagName):
				163	ws = wsmap[node.tagName]
				164	children = node.childNodes
				165	children.reverse()
				166	if children[0].nodeType == xml.dom.core.TEXT:
				167	data = string.rstrip(children[0].data) + ws
				168	children[0].data = data
				169	children.reverse()
				170	# hack to get the title in place:
				171	if node.tagName == "title" \
				172	and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
				173	node.parentNode.insertBefore(doc.createText("\n "),
				174	node.parentNode.firstChild)
				175	for child in node.childNodes:
				176	if child.nodeType == xml.dom.core.ELEMENT:
				177	queue.append(child)
				178
				179
				180	def normalize(doc):
				181	for node in doc.childNodes:
				182	if node.nodeType == xml.dom.core.ELEMENT:
				183	node.normalize()
				184
				185
				186	def cleanup_trailing_parens(doc, element_names):
				187	d = {}
				188	for gi in element_names:
				189	d[gi] = gi
				190	rewrite_element = d.has_key
				191	queue = []
				192	for node in doc.childNodes:
				193	if node.nodeType == xml.dom.core.ELEMENT:
				194	queue.append(node)
				195	while queue:
				196	node = queue[0]
				197	del queue[0]
				198	if rewrite_element(node.tagName):
				199	children = node.childNodes
				200	if len(children) == 1 \
				201	and children[0].nodeType == xml.dom.core.TEXT:
				202	data = children[0].data
				203	if data[-2:] == "()":
				204	children[0].data = data[:-2]
				205	else:
				206	for child in node.childNodes:
				207	if child.nodeType == xml.dom.core.ELEMENT:
				208	queue.append(child)
				209
				210
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	211	def convert(ifp, ofp):
				212	p = xml.dom.esis_builder.EsisBuilder()
				213	p.feed(ifp.read())
				214	doc = p.document
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	215	normalize(doc)
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	216	handle_args(doc)
				217	handle_comments(doc)
				218	simplify(doc)
				219	handle_labels(doc)
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	220	fixup_trailing_whitespace(doc, {
				221	"abstract": "\n",
				222	"title": "",
				223	"chapter": "\n\n",
				224	"section": "\n\n",
				225	"subsection": "\n\n",
				226	"subsubsection": "\n\n",
				227	"paragraph": "\n\n",
				228	"subparagraph": "\n\n",
				229	})
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	230	cleanup_root_text(doc)
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	231	cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	232	try:
				233	ofp.write(doc.toxml())
				234	ofp.write("\n")
				235	except IOError, (err, msg):
				236	# Ignore EPIPE; it just means that whoever we're writing to stopped
				237	# reading. The rest of the output would be ignored. All other errors
				238	# should still be reported,
				239	if err != errno.EPIPE:
				240	raise
				241
				242
				243	def main():
				244	if len(sys.argv) == 1:
				245	ifp = sys.stdin
				246	ofp = sys.stdout
				247	elif len(sys.argv) == 2:
				248	ifp = open(sys.argv[1])
				249	ofp = sys.stdout
				250	elif len(sys.argv) == 3:
				251	ifp = open(sys.argv[1])
				252	ofp = open(sys.argv[2], "w")
				253	else:
				254	usage()
				255	sys.exit(2)
				256	convert(ifp, ofp)
				257
				258
				259	if __name__ == "__main__":
				260	main()