Blame - Doc/tools/sgmlconv/docfixer.py - platform/external/python/cpython3

blob: 97bcb09e3eed43b012a2a5cea63cf28117434ff7 [file] [log] [blame]

Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	1	#! /usr/bin/env python
				2
				3	"""Promote the IDs from <label/> elements to the enclosing section / chapter /
				4	whatever, then remove the <label/> elements. This allows *ML style internal
				5	linking rather than the bogus LaTeX model.
				6
				7	Note that <label/>s in <title> elements are promoted two steps, since the
				8	<title> elements are artificially created from the section parameter, and the
				9	label really refers to the sectioning construct.
				10	"""
				11	__version__ = '$Revision$'
				12
				13
				14	import errno
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	15	import esistools
				16	import re
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	17	import string
				18	import sys
				19	import xml.dom.core
				20	import xml.dom.esis_builder
				21
				22
				23	# Workaround to deal with invalid documents (multiple root elements). This
				24	# does not indicate a bug in the DOM implementation.
				25	#
				26	def get_documentElement(self):
				27	docelem = None
				28	for n in self._node.children:
				29	if n.type == xml.dom.core.ELEMENT:
				30	docelem = xml.dom.core.Element(n, self, self)
				31	return docelem
				32
				33	xml.dom.core.Document.get_documentElement = get_documentElement
				34
				35
				36	# Replace get_childNodes for the Document class; without this, children
				37	# accessed from the Document object via .childNodes (no matter how many
				38	# levels of access are used) will be given an ownerDocument of None.
				39	#
				40	def get_childNodes(self):
				41	return xml.dom.core.NodeList(self._node.children, self, self)
				42
				43	xml.dom.core.Document.get_childNodes = get_childNodes
				44
				45
				46	def get_first_element(doc, gi):
				47	for n in doc.childNodes:
				48	if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
				49	return n
				50
				51	def extract_first_element(doc, gi):
				52	node = get_first_element(doc, gi)
				53	if node is not None:
				54	doc.removeChild(node)
				55	return node
				56
				57
				58	def simplify(doc):
				59	# Try to rationalize the document a bit, since these things are simply
				60	# not valid SGML/XML documents as they stand, and need a little work.
				61	documentclass = "document"
				62	inputs = []
				63	node = extract_first_element(doc, "documentclass")
				64	if node is not None:
				65	documentclass = node.getAttribute("classname")
				66	node = extract_first_element(doc, "title")
				67	if node is not None:
				68	inputs.append(node)
				69	# update the name of the root element
				70	node = get_first_element(doc, "document")
				71	if node is not None:
				72	node._node.name = documentclass
				73	while 1:
				74	node = extract_first_element(doc, "input")
				75	if node is None:
				76	break
				77	inputs.append(node)
				78	if inputs:
				79	docelem = doc.documentElement
				80	inputs.reverse()
				81	for node in inputs:
				82	text = doc.createTextNode("\n")
				83	docelem.insertBefore(text, docelem.firstChild)
				84	docelem.insertBefore(node, text)
				85	docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
				86	while doc.firstChild.nodeType == xml.dom.core.TEXT:
				87	doc.removeChild(doc.firstChild)
				88
				89
				90	def cleanup_root_text(doc):
				91	discards = []
				92	skip = 0
				93	for n in doc.childNodes:
				94	prevskip = skip
				95	skip = 0
				96	if n.nodeType == xml.dom.core.TEXT and not prevskip:
				97	discards.append(n)
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	98	elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT":
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	99	skip = 1
				100	for node in discards:
				101	doc.removeChild(node)
				102
				103
				104	def rewrite_desc_entries(doc, argname_gi):
				105	argnodes = doc.getElementsByTagName(argname_gi)
				106	for node in argnodes:
				107	parent = node.parentNode
				108	nodes = []
				109	for n in parent.childNodes:
				110	if n.nodeType != xml.dom.core.ELEMENT or n.tagName != argname_gi:
				111	nodes.append(n)
				112	desc = doc.createElement("description")
				113	for n in nodes:
				114	parent.removeChild(n)
				115	desc.appendChild(n)
				116	if node.childNodes:
				117	# keep the <args>...</args>, newline & indent
				118	parent.insertBefore(doc.createText("\n "), node)
				119	else:
				120	# no arguments, remove the <args/> node
				121	parent.removeChild(node)
				122	parent.appendChild(doc.createText("\n "))
				123	parent.appendChild(desc)
				124	parent.appendChild(doc.createText("\n"))
				125
				126	def handle_args(doc):
				127	rewrite_desc_entries(doc, "args")
				128	rewrite_desc_entries(doc, "constructor-args")
				129
				130
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	131	def handle_appendix(doc):
				132	# must be called after simplfy() if document is multi-rooted to begin with
				133	docelem = doc.documentElement
				134	toplevel = docelem.tagName == "manual" and "chapter" or "section"
				135	appendices = 0
				136	nodes = []
				137	for node in docelem.childNodes:
				138	if appendices:
				139	nodes.append(node)
				140	elif node.nodeType == xml.dom.core.ELEMENT:
				141	appnodes = node.getElementsByTagName("appendix")
				142	if appnodes:
				143	appendices = 1
				144	parent = appnodes[0].parentNode
				145	parent.removeChild(appnodes[0])
				146	parent.normalize()
				147	if nodes:
				148	map(docelem.removeChild, nodes)
				149	docelem.appendChild(doc.createTextNode("\n\n\n"))
				150	back = doc.createElement("back-matter")
				151	docelem.appendChild(back)
				152	back.appendChild(doc.createTextNode("\n"))
				153	while nodes and nodes[0].nodeType == xml.dom.core.TEXT \
				154	and not string.strip(nodes[0].data):
				155	del nodes[0]
				156	map(back.appendChild, nodes)
				157	docelem.appendChild(doc.createTextNode("\n"))
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	158
				159
				160	def handle_labels(doc):
				161	labels = doc.getElementsByTagName("label")
				162	for label in labels:
				163	id = label.getAttribute("id")
				164	if not id:
				165	continue
				166	parent = label.parentNode
				167	if parent.tagName == "title":
				168	parent.parentNode.setAttribute("id", id)
				169	else:
				170	parent.setAttribute("id", id)
				171	# now, remove <label id="..."/> from parent:
				172	parent.removeChild(label)
				173
				174
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	175	def fixup_trailing_whitespace(doc, wsmap):
				176	queue = [doc]
				177	while queue:
				178	node = queue[0]
				179	del queue[0]
				180	if node.nodeType == xml.dom.core.ELEMENT \
				181	and wsmap.has_key(node.tagName):
				182	ws = wsmap[node.tagName]
				183	children = node.childNodes
				184	children.reverse()
				185	if children[0].nodeType == xml.dom.core.TEXT:
				186	data = string.rstrip(children[0].data) + ws
				187	children[0].data = data
				188	children.reverse()
				189	# hack to get the title in place:
				190	if node.tagName == "title" \
				191	and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
				192	node.parentNode.insertBefore(doc.createText("\n "),
				193	node.parentNode.firstChild)
				194	for child in node.childNodes:
				195	if child.nodeType == xml.dom.core.ELEMENT:
				196	queue.append(child)
				197
				198
				199	def normalize(doc):
				200	for node in doc.childNodes:
				201	if node.nodeType == xml.dom.core.ELEMENT:
				202	node.normalize()
				203
				204
				205	def cleanup_trailing_parens(doc, element_names):
				206	d = {}
				207	for gi in element_names:
				208	d[gi] = gi
				209	rewrite_element = d.has_key
				210	queue = []
				211	for node in doc.childNodes:
				212	if node.nodeType == xml.dom.core.ELEMENT:
				213	queue.append(node)
				214	while queue:
				215	node = queue[0]
				216	del queue[0]
				217	if rewrite_element(node.tagName):
				218	children = node.childNodes
				219	if len(children) == 1 \
				220	and children[0].nodeType == xml.dom.core.TEXT:
				221	data = children[0].data
				222	if data[-2:] == "()":
				223	children[0].data = data[:-2]
				224	else:
				225	for child in node.childNodes:
				226	if child.nodeType == xml.dom.core.ELEMENT:
				227	queue.append(child)
				228
				229
Fred Drake	fba0ba2	1998-12-10 05:07:09 +0000	[diff] [blame^]	230	def cleanup_synopses(doc):
				231	# Actually, this should build a "moduleinfo" element from various
				232	# parts of the meta-information in the section. <moduleinfo> needs
				233	# some design work before we can really do anything real.
				234	synopses = doc.getElementsByTagName("modulesynopsis")
				235	for node in synopses:
				236	node._node.name = "synopsis"
				237	parent = node.parentNode
				238	if parent.tagName == "section":
				239	children = parent.childNodes
				240	parent.removeChild(node)
				241	parent.insertBefore(node, children[2])
				242	text = doc.createTextNode("\n ")
				243	parent.insertBefore(text, node)
				244
				245
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	246	_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
				247
				248	def write_esis(doc, ofp, knownempty):
				249	for node in doc.childNodes:
				250	nodeType = node.nodeType
				251	if nodeType == xml.dom.core.ELEMENT:
				252	gi = node.tagName
				253	if knownempty(gi):
				254	if node.hasChildNodes():
				255	raise ValueError, "declared-empty node has children"
				256	ofp.write("e\n")
				257	for k, v in node.attributes.items():
				258	value = v.value
				259	if _token_rx.match(value):
				260	dtype = "TOKEN"
				261	else:
				262	dtype = "CDATA"
				263	ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
				264	ofp.write("(%s\n" % gi)
				265	write_esis(node, ofp, knownempty)
				266	ofp.write(")%s\n" % gi)
				267	elif nodeType == xml.dom.core.TEXT:
				268	ofp.write("-%s\n" % esistools.encode(node.data))
				269	else:
				270	raise RuntimeError, "unsupported node type: %s" % nodeType
				271
				272
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	273	def convert(ifp, ofp):
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	274	p = esistools.ExtendedEsisBuilder()
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	275	p.feed(ifp.read())
				276	doc = p.document
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	277	normalize(doc)
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	278	handle_args(doc)
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	279	simplify(doc)
				280	handle_labels(doc)
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	281	handle_appendix(doc)
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	282	fixup_trailing_whitespace(doc, {
				283	"abstract": "\n",
				284	"title": "",
				285	"chapter": "\n\n",
				286	"section": "\n\n",
				287	"subsection": "\n\n",
				288	"subsubsection": "\n\n",
				289	"paragraph": "\n\n",
				290	"subparagraph": "\n\n",
				291	})
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	292	cleanup_root_text(doc)
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	293	cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
Fred Drake	fba0ba2	1998-12-10 05:07:09 +0000	[diff] [blame^]	294	cleanup_synopses(doc)
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	295	#
				296	d = {}
				297	for gi in p.get_empties():
				298	d[gi] = gi
				299	knownempty = d.has_key
				300	#
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	301	try:
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	302	write_esis(doc, ofp, knownempty)
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	303	except IOError, (err, msg):
				304	# Ignore EPIPE; it just means that whoever we're writing to stopped
				305	# reading. The rest of the output would be ignored. All other errors
				306	# should still be reported,
				307	if err != errno.EPIPE:
				308	raise
				309
				310
				311	def main():
				312	if len(sys.argv) == 1:
				313	ifp = sys.stdin
				314	ofp = sys.stdout
				315	elif len(sys.argv) == 2:
				316	ifp = open(sys.argv[1])
				317	ofp = sys.stdout
				318	elif len(sys.argv) == 3:
				319	ifp = open(sys.argv[1])
				320	ofp = open(sys.argv[2], "w")
				321	else:
				322	usage()
				323	sys.exit(2)
				324	convert(ifp, ofp)
				325
				326
				327	if __name__ == "__main__":
				328	main()