Blame - Doc/tools/sgmlconv/docfixer.py - platform/external/python/cpython3

blob: 11c487d8d4a34f882abf9fd60ad1e89f6d26fc2e [file] [log] [blame]

Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	1	#! /usr/bin/env python
				2
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	3	"""Perform massive transformations on a document tree created from the LaTeX
				4	of the Python documentation, and dump the ESIS data for the transformed tree.
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	5	"""
				6	__version__ = '$Revision$'
				7
				8
				9	import errno
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	10	import esistools
				11	import re
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	12	import string
				13	import sys
				14	import xml.dom.core
				15	import xml.dom.esis_builder
				16
				17
Fred Drake	f8ebb55	1999-01-14 19:45:38 +0000	[diff] [blame]	18	class ConversionError(Exception):
				19	pass
				20
				21
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	22	DEBUG_PARA_FIXER = 0
				23
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	24	if DEBUG_PARA_FIXER:
				25	def para_msg(s):
				26	sys.stderr.write("*** %s\n" % s)
				27	else:
				28	def para_msg(s):
				29	pass
				30
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	31
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	32	# Workaround to deal with invalid documents (multiple root elements). This
				33	# does not indicate a bug in the DOM implementation.
				34	#
				35	def get_documentElement(self):
				36	docelem = None
				37	for n in self._node.children:
				38	if n.type == xml.dom.core.ELEMENT:
				39	docelem = xml.dom.core.Element(n, self, self)
				40	return docelem
				41
				42	xml.dom.core.Document.get_documentElement = get_documentElement
				43
				44
				45	# Replace get_childNodes for the Document class; without this, children
				46	# accessed from the Document object via .childNodes (no matter how many
				47	# levels of access are used) will be given an ownerDocument of None.
				48	#
				49	def get_childNodes(self):
				50	return xml.dom.core.NodeList(self._node.children, self, self)
				51
				52	xml.dom.core.Document.get_childNodes = get_childNodes
				53
				54
				55	def get_first_element(doc, gi):
				56	for n in doc.childNodes:
				57	if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
				58	return n
				59
				60	def extract_first_element(doc, gi):
				61	node = get_first_element(doc, gi)
				62	if node is not None:
				63	doc.removeChild(node)
				64	return node
				65
				66
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	67	def find_all_elements(doc, gi):
				68	nodes = []
				69	if doc.nodeType == xml.dom.core.ELEMENT and doc.tagName == gi:
				70	nodes.append(doc)
				71	for child in doc.childNodes:
				72	if child.nodeType == xml.dom.core.ELEMENT:
				73	if child.tagName == gi:
				74	nodes.append(child)
				75	for node in child.getElementsByTagName(gi):
				76	nodes.append(node)
				77	return nodes
				78
				79
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	80	def simplify(doc):
				81	# Try to rationalize the document a bit, since these things are simply
				82	# not valid SGML/XML documents as they stand, and need a little work.
				83	documentclass = "document"
				84	inputs = []
				85	node = extract_first_element(doc, "documentclass")
				86	if node is not None:
				87	documentclass = node.getAttribute("classname")
				88	node = extract_first_element(doc, "title")
				89	if node is not None:
				90	inputs.append(node)
				91	# update the name of the root element
				92	node = get_first_element(doc, "document")
				93	if node is not None:
				94	node._node.name = documentclass
				95	while 1:
				96	node = extract_first_element(doc, "input")
				97	if node is None:
				98	break
				99	inputs.append(node)
				100	if inputs:
				101	docelem = doc.documentElement
				102	inputs.reverse()
				103	for node in inputs:
				104	text = doc.createTextNode("\n")
				105	docelem.insertBefore(text, docelem.firstChild)
				106	docelem.insertBefore(node, text)
				107	docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
				108	while doc.firstChild.nodeType == xml.dom.core.TEXT:
				109	doc.removeChild(doc.firstChild)
				110
				111
				112	def cleanup_root_text(doc):
				113	discards = []
				114	skip = 0
				115	for n in doc.childNodes:
				116	prevskip = skip
				117	skip = 0
				118	if n.nodeType == xml.dom.core.TEXT and not prevskip:
				119	discards.append(n)
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	120	elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT":
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	121	skip = 1
				122	for node in discards:
				123	doc.removeChild(node)
				124
				125
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	126	DESCRIPTOR_ELEMENTS = (
				127	"cfuncdesc", "cvardesc", "ctypedesc",
				128	"classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
				129	"excdesc", "funcdesc", "funcdescni", "opcodedesc",
				130	"datadesc", "datadescni",
				131	)
				132
				133	def fixup_descriptors(doc):
Fred Drake	3a7ff99	1999-01-29 21:31:12 +0000	[diff] [blame]	134	sections = find_all_elements(doc, "section")
				135	for section in sections:
				136	find_and_fix_descriptors(doc, section)
				137
				138
				139	def find_and_fix_descriptors(doc, container):
				140	children = container.childNodes
				141	for child in children:
				142	if child.nodeType == xml.dom.core.ELEMENT:
				143	tagName = child.tagName
				144	if tagName in DESCRIPTOR_ELEMENTS:
				145	rewrite_descriptor(doc, child)
				146	elif tagName == "subsection":
				147	find_and_fix_descriptors(doc, child)
				148
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	149
				150	def rewrite_descriptor(doc, descriptor):
				151	#
				152	# Do these things:
				153	# 1. Add an "index=noindex" attribute to the element if the tagName
				154	# ends in 'ni', removing the 'ni' from the name.
				155	# 2. Create a <signature> from the name attribute and <args>.
				156	# 3. Create additional <signature>s from <*line{,ni}> elements,
				157	# if found.
Fred Drake	1dd152d	1999-01-29 22:12:29 +0000	[diff] [blame]	158	# 4. If a <versionadded> is found, move it to an attribute on the
				159	# descriptor.
				160	# 5. Move remaining child nodes to a <description> element.
				161	# 6. Put it back together.
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	162	#
				163	descname = descriptor.tagName
				164	index = 1
				165	if descname[-2:] == "ni":
				166	descname = descname[:-2]
				167	descriptor.setAttribute("index", "noindex")
				168	descriptor._node.name = descname
				169	index = 0
				170	desctype = descname[:-4] # remove 'desc'
				171	linename = desctype + "line"
				172	if not index:
				173	linename = linename + "ni"
				174	# 2.
				175	signature = doc.createElement("signature")
				176	name = doc.createElement("name")
				177	signature.appendChild(doc.createTextNode("\n "))
				178	signature.appendChild(name)
				179	name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
				180	descriptor.removeAttribute("name")
				181	if descriptor.attributes.has_key("var"):
				182	variable = descriptor.getAttribute("var")
				183	if variable:
				184	args = doc.createElement("args")
				185	args.appendChild(doc.createTextNode(variable))
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	186	signature.appendChild(doc.createTextNode("\n "))
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	187	signature.appendChild(args)
				188	descriptor.removeAttribute("var")
				189	newchildren = [signature]
				190	children = descriptor.childNodes
				191	pos = skip_leading_nodes(children, 0)
				192	if pos < len(children):
				193	child = children[pos]
				194	if child.nodeType == xml.dom.core.ELEMENT and child.tagName == "args":
				195	# create an <args> in <signature>:
				196	args = doc.createElement("args")
				197	argchildren = []
				198	map(argchildren.append, child.childNodes)
				199	for n in argchildren:
				200	child.removeChild(n)
				201	args.appendChild(n)
				202	signature.appendChild(doc.createTextNode("\n "))
				203	signature.appendChild(args)
				204	signature.appendChild(doc.createTextNode("\n "))
Fred Drake	1dd152d	1999-01-29 22:12:29 +0000	[diff] [blame]	205	# 3, 4.
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	206	pos = skip_leading_nodes(children, pos + 1)
				207	while pos < len(children) \
				208	and children[pos].nodeType == xml.dom.core.ELEMENT \
Fred Drake	1dd152d	1999-01-29 22:12:29 +0000	[diff] [blame]	209	and children[pos].tagName in (linename, "versionadded"):
				210	if children[pos].tagName == linename:
				211	# this is really a supplemental signature, create <signature>
				212	sig = methodline_to_signature(doc, children[pos])
				213	newchildren.append(sig)
				214	else:
				215	# <versionadded added=...>
				216	descriptor.setAttribute(
				217	"added", children[pos].getAttribute("version"))
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	218	pos = skip_leading_nodes(children, pos + 1)
Fred Drake	1dd152d	1999-01-29 22:12:29 +0000	[diff] [blame]	219	# 5.
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	220	description = doc.createElement("description")
				221	description.appendChild(doc.createTextNode("\n"))
				222	newchildren.append(description)
				223	move_children(descriptor, description, pos)
				224	last = description.childNodes[-1]
				225	if last.nodeType == xml.dom.core.TEXT:
				226	last.data = string.rstrip(last.data) + "\n "
Fred Drake	1dd152d	1999-01-29 22:12:29 +0000	[diff] [blame]	227	# 6.
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	228	# should have nothing but whitespace and signature lines in <descriptor>;
				229	# discard them
				230	while descriptor.childNodes:
				231	descriptor.removeChild(descriptor.childNodes[0])
				232	for node in newchildren:
				233	descriptor.appendChild(doc.createTextNode("\n "))
				234	descriptor.appendChild(node)
				235	descriptor.appendChild(doc.createTextNode("\n"))
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	236
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	237
				238	def methodline_to_signature(doc, methodline):
				239	signature = doc.createElement("signature")
				240	signature.appendChild(doc.createTextNode("\n "))
				241	name = doc.createElement("name")
				242	name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	243	methodline.removeAttribute("name")
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	244	signature.appendChild(name)
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	245	if len(methodline.childNodes):
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	246	args = doc.createElement("args")
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	247	signature.appendChild(doc.createTextNode("\n "))
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	248	signature.appendChild(args)
				249	move_children(methodline, args)
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	250	signature.appendChild(doc.createTextNode("\n "))
				251	return signature
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	252
				253
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	254	def move_children(origin, dest, start=0):
				255	children = origin.childNodes
				256	while start < len(children):
				257	node = children[start]
				258	origin.removeChild(node)
				259	dest.appendChild(node)
				260
				261
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	262	def handle_appendix(doc):
				263	# must be called after simplfy() if document is multi-rooted to begin with
				264	docelem = doc.documentElement
				265	toplevel = docelem.tagName == "manual" and "chapter" or "section"
				266	appendices = 0
				267	nodes = []
				268	for node in docelem.childNodes:
				269	if appendices:
				270	nodes.append(node)
				271	elif node.nodeType == xml.dom.core.ELEMENT:
				272	appnodes = node.getElementsByTagName("appendix")
				273	if appnodes:
				274	appendices = 1
				275	parent = appnodes[0].parentNode
				276	parent.removeChild(appnodes[0])
				277	parent.normalize()
				278	if nodes:
				279	map(docelem.removeChild, nodes)
				280	docelem.appendChild(doc.createTextNode("\n\n\n"))
				281	back = doc.createElement("back-matter")
				282	docelem.appendChild(back)
				283	back.appendChild(doc.createTextNode("\n"))
				284	while nodes and nodes[0].nodeType == xml.dom.core.TEXT \
				285	and not string.strip(nodes[0].data):
				286	del nodes[0]
				287	map(back.appendChild, nodes)
				288	docelem.appendChild(doc.createTextNode("\n"))
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	289
				290
				291	def handle_labels(doc):
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	292	for label in find_all_elements(doc, "label"):
				293	id = label.getAttribute("id")
				294	if not id:
				295	continue
				296	parent = label.parentNode
				297	if parent.tagName == "title":
				298	parent.parentNode.setAttribute("id", id)
				299	else:
				300	parent.setAttribute("id", id)
				301	# now, remove <label id="..."/> from parent:
				302	parent.removeChild(label)
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	303
				304
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	305	def fixup_trailing_whitespace(doc, wsmap):
				306	queue = [doc]
				307	while queue:
				308	node = queue[0]
				309	del queue[0]
				310	if node.nodeType == xml.dom.core.ELEMENT \
				311	and wsmap.has_key(node.tagName):
				312	ws = wsmap[node.tagName]
				313	children = node.childNodes
				314	children.reverse()
				315	if children[0].nodeType == xml.dom.core.TEXT:
				316	data = string.rstrip(children[0].data) + ws
				317	children[0].data = data
				318	children.reverse()
				319	# hack to get the title in place:
				320	if node.tagName == "title" \
				321	and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
				322	node.parentNode.insertBefore(doc.createText("\n "),
				323	node.parentNode.firstChild)
				324	for child in node.childNodes:
				325	if child.nodeType == xml.dom.core.ELEMENT:
				326	queue.append(child)
				327
				328
				329	def normalize(doc):
				330	for node in doc.childNodes:
				331	if node.nodeType == xml.dom.core.ELEMENT:
				332	node.normalize()
				333
				334
				335	def cleanup_trailing_parens(doc, element_names):
				336	d = {}
				337	for gi in element_names:
				338	d[gi] = gi
				339	rewrite_element = d.has_key
				340	queue = []
				341	for node in doc.childNodes:
				342	if node.nodeType == xml.dom.core.ELEMENT:
				343	queue.append(node)
				344	while queue:
				345	node = queue[0]
				346	del queue[0]
				347	if rewrite_element(node.tagName):
				348	children = node.childNodes
				349	if len(children) == 1 \
				350	and children[0].nodeType == xml.dom.core.TEXT:
				351	data = children[0].data
				352	if data[-2:] == "()":
				353	children[0].data = data[:-2]
				354	else:
				355	for child in node.childNodes:
				356	if child.nodeType == xml.dom.core.ELEMENT:
				357	queue.append(child)
				358
				359
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	360	def contents_match(left, right):
				361	left_children = left.childNodes
				362	right_children = right.childNodes
				363	if len(left_children) != len(right_children):
				364	return 0
				365	for l, r in map(None, left_children, right_children):
				366	nodeType = l.nodeType
				367	if nodeType != r.nodeType:
				368	return 0
				369	if nodeType == xml.dom.core.ELEMENT:
				370	if l.tagName != r.tagName:
				371	return 0
				372	# should check attributes, but that's not a problem here
				373	if not contents_match(l, r):
				374	return 0
				375	elif nodeType == xml.dom.core.TEXT:
				376	if l.data != r.data:
				377	return 0
				378	else:
				379	# not quite right, but good enough
				380	return 0
				381	return 1
				382
				383
				384	def create_module_info(doc, section):
				385	# Heavy.
				386	node = extract_first_element(section, "modulesynopsis")
				387	if node is None:
				388	return
				389	node._node.name = "synopsis"
				390	lastchild = node.childNodes[-1]
				391	if lastchild.nodeType == xml.dom.core.TEXT \
				392	and lastchild.data[-1:] == ".":
				393	lastchild.data = lastchild.data[:-1]
Fred Drake	4259f0d	1999-01-19 23:09:31 +0000	[diff] [blame]	394	modauthor = extract_first_element(section, "moduleauthor")
				395	if modauthor:
				396	modauthor._node.name = "author"
				397	modauthor.appendChild(doc.createTextNode(
				398	modauthor.getAttribute("name")))
				399	modauthor.removeAttribute("name")
Fred Drake	87a42cd	1999-03-11 17:35:12 +0000	[diff] [blame^]	400	platform = extract_first_element(section, "platform")
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	401	if section.tagName == "section":
				402	modinfo_pos = 2
				403	modinfo = doc.createElement("moduleinfo")
				404	moddecl = extract_first_element(section, "declaremodule")
				405	name = None
				406	if moddecl:
				407	modinfo.appendChild(doc.createTextNode("\n "))
				408	name = moddecl.attributes["name"].value
				409	namenode = doc.createElement("name")
				410	namenode.appendChild(doc.createTextNode(name))
				411	modinfo.appendChild(namenode)
				412	type = moddecl.attributes.get("type")
				413	if type:
				414	type = type.value
				415	modinfo.appendChild(doc.createTextNode("\n "))
				416	typenode = doc.createElement("type")
				417	typenode.appendChild(doc.createTextNode(type))
				418	modinfo.appendChild(typenode)
Fred Drake	1dd152d	1999-01-29 22:12:29 +0000	[diff] [blame]	419	versionadded = extract_first_element(section, "versionadded")
				420	if versionadded:
				421	modinfo.setAttribute("added", versionadded.getAttribute("version"))
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	422	title = get_first_element(section, "title")
				423	if title:
				424	children = title.childNodes
				425	if len(children) >= 2 \
				426	and children[0].nodeType == xml.dom.core.ELEMENT \
				427	and children[0].tagName == "module" \
				428	and children[0].childNodes[0].data == name:
				429	# this is it; morph the <title> into <short-synopsis>
				430	first_data = children[1]
				431	if first_data.data[:4] == " ---":
				432	first_data.data = string.lstrip(first_data.data[4:])
				433	title._node.name = "short-synopsis"
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	434	if children[-1].nodeType == xml.dom.core.TEXT \
				435	and children[-1].data[-1:] == ".":
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	436	children[-1].data = children[-1].data[:-1]
				437	section.removeChild(title)
				438	section.removeChild(section.childNodes[0])
				439	title.removeChild(children[0])
				440	modinfo_pos = 0
				441	else:
				442	sys.stderr.write(
				443	"module name in title doesn't match"
				444	" <declaremodule>; no <short-synopsis>\n")
				445	else:
				446	sys.stderr.write(
				447	"Unexpected condition: <section> without <title>\n")
				448	modinfo.appendChild(doc.createTextNode("\n "))
				449	modinfo.appendChild(node)
				450	if title and not contents_match(title, node):
				451	# The short synopsis is actually different,
				452	# and needs to be stored:
				453	modinfo.appendChild(doc.createTextNode("\n "))
				454	modinfo.appendChild(title)
Fred Drake	4259f0d	1999-01-19 23:09:31 +0000	[diff] [blame]	455	if modauthor:
				456	modinfo.appendChild(doc.createTextNode("\n "))
				457	modinfo.appendChild(modauthor)
Fred Drake	87a42cd	1999-03-11 17:35:12 +0000	[diff] [blame^]	458	if platform:
				459	modinfo.appendChild(doc.createTextNode("\n "))
				460	modinfo.appendChild(platform)
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	461	modinfo.appendChild(doc.createTextNode("\n "))
				462	section.insertBefore(modinfo, section.childNodes[modinfo_pos])
				463	section.insertBefore(doc.createTextNode("\n "), modinfo)
Fred Drake	87a42cd	1999-03-11 17:35:12 +0000	[diff] [blame^]	464	#
				465	# The rest of this removes extra newlines from where we cut out
				466	# a lot of elements. A lot of code for minimal value, but keeps
				467	# keeps the generated SGML from being too funny looking.
				468	#
				469	section.normalize()
				470	children = section.childNodes
				471	for i in range(len(children)):
				472	node = children[i]
				473	if node.nodeType == xml.dom.core.ELEMENT \
				474	and node.tagName == "moduleinfo":
				475	nextnode = children[i+1]
				476	if nextnode.nodeType == xml.dom.core.TEXT:
				477	data = nextnode.data
				478	if len(string.lstrip(data)) < (len(data) - 4):
				479	nextnode.data = "\n\n\n" + string.lstrip(data)
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	480
				481
Fred Drake	fba0ba2	1998-12-10 05:07:09 +0000	[diff] [blame]	482	def cleanup_synopses(doc):
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	483	for node in find_all_elements(doc, "section"):
				484	create_module_info(doc, node)
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	485
				486
Fred Drake	f8ebb55	1999-01-14 19:45:38 +0000	[diff] [blame]	487	def remap_element_names(root, name_map):
				488	queue = []
				489	for child in root.childNodes:
				490	if child.nodeType == xml.dom.core.ELEMENT:
				491	queue.append(child)
				492	while queue:
				493	node = queue.pop()
				494	tagName = node.tagName
				495	if name_map.has_key(tagName):
				496	name, attrs = name_map[tagName]
				497	node._node.name = name
				498	for attr, value in attrs.items():
				499	node.setAttribute(attr, value)
				500	for child in node.childNodes:
				501	if child.nodeType == xml.dom.core.ELEMENT:
				502	queue.append(child)
				503
				504
				505	def fixup_table_structures(doc):
				506	# must be done after remap_element_names(), or the tables won't be found
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	507	for table in find_all_elements(doc, "table"):
				508	fixup_table(doc, table)
				509
Fred Drake	f8ebb55	1999-01-14 19:45:38 +0000	[diff] [blame]	510
				511	def fixup_table(doc, table):
				512	# create the table head
				513	thead = doc.createElement("thead")
				514	row = doc.createElement("row")
				515	move_elements_by_name(doc, table, row, "entry")
				516	thead.appendChild(doc.createTextNode("\n "))
				517	thead.appendChild(row)
				518	thead.appendChild(doc.createTextNode("\n "))
				519	# create the table body
				520	tbody = doc.createElement("tbody")
				521	prev_row = None
				522	last_was_hline = 0
				523	children = table.childNodes
				524	for child in children:
				525	if child.nodeType == xml.dom.core.ELEMENT:
				526	tagName = child.tagName
				527	if tagName == "hline" and prev_row is not None:
				528	prev_row.setAttribute("rowsep", "1")
				529	elif tagName == "row":
				530	prev_row = child
				531	# save the rows:
				532	tbody.appendChild(doc.createTextNode("\n "))
				533	move_elements_by_name(doc, table, tbody, "row", sep="\n ")
				534	# and toss the rest:
				535	while children:
				536	child = children[0]
				537	nodeType = child.nodeType
				538	if nodeType == xml.dom.core.TEXT:
				539	if string.strip(child.data):
				540	raise ConversionError("unexpected free data in table")
				541	table.removeChild(child)
				542	continue
				543	if nodeType == xml.dom.core.ELEMENT:
				544	if child.tagName != "hline":
				545	raise ConversionError(
				546	"unexpected <%s> in table" % child.tagName)
				547	table.removeChild(child)
				548	continue
				549	raise ConversionError(
				550	"unexpected %s node in table" % child.__class__.__name__)
				551	# nothing left in the <table>; add the <thead> and <tbody>
				552	tgroup = doc.createElement("tgroup")
				553	tgroup.appendChild(doc.createTextNode("\n "))
				554	tgroup.appendChild(thead)
				555	tgroup.appendChild(doc.createTextNode("\n "))
				556	tgroup.appendChild(tbody)
				557	tgroup.appendChild(doc.createTextNode("\n "))
				558	table.appendChild(tgroup)
				559	# now make the <entry>s look nice:
				560	for row in table.getElementsByTagName("row"):
				561	fixup_row(doc, row)
				562
				563
				564	def fixup_row(doc, row):
				565	entries = []
				566	map(entries.append, row.childNodes[1:])
				567	for entry in entries:
				568	row.insertBefore(doc.createTextNode("\n "), entry)
				569	# row.appendChild(doc.createTextNode("\n "))
				570
				571
				572	def move_elements_by_name(doc, source, dest, name, sep=None):
				573	nodes = []
				574	for child in source.childNodes:
				575	if child.nodeType == xml.dom.core.ELEMENT and child.tagName == name:
				576	nodes.append(child)
				577	for node in nodes:
				578	source.removeChild(node)
				579	dest.appendChild(node)
				580	if sep:
				581	dest.appendChild(doc.createTextNode(sep))
				582
				583
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	584	RECURSE_INTO_PARA_CONTAINERS = (
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	585	"chapter", "abstract", "enumerate",
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	586	"section", "subsection", "subsubsection",
				587	"paragraph", "subparagraph",
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	588	"howto", "manual",
Fred Drake	4259f0d	1999-01-19 23:09:31 +0000	[diff] [blame]	589	)
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	590
				591	PARA_LEVEL_ELEMENTS = (
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	592	"moduleinfo", "title", "verbatim", "enumerate", "item",
Fred Drake	93d762f	1999-02-18 16:32:21 +0000	[diff] [blame]	593	"interpreter-session",
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	594	"opcodedesc", "classdesc", "datadesc",
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	595	"funcdesc", "methoddesc", "excdesc",
				596	"funcdescni", "methoddescni", "excdescni",
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	597	"tableii", "tableiii", "tableiv", "localmoduletable",
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	598	"sectionauthor", "seealso",
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	599	# include <para>, so we can just do it again to get subsequent paras:
				600	"para",
				601	)
				602
				603	PARA_LEVEL_PRECEEDERS = (
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	604	"index", "indexii", "indexiii", "indexiv", "setindexsubitem",
				605	"stindex", "obindex", "COMMENT", "label", "input", "title",
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	606	)
				607
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	608
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	609	def fixup_paras(doc):
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	610	for child in doc.childNodes:
				611	if child.nodeType == xml.dom.core.ELEMENT \
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	612	and child.tagName in RECURSE_INTO_PARA_CONTAINERS:
				613	#
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	614	fixup_paras_helper(doc, child)
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	615	descriptions = find_all_elements(doc, "description")
				616	for description in descriptions:
				617	fixup_paras_helper(doc, description)
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	618
				619
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	620	def fixup_paras_helper(doc, container, depth=0):
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	621	# document is already normalized
				622	children = container.childNodes
				623	start = 0
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	624	while len(children) > start:
				625	start = skip_leading_nodes(children, start)
				626	if start >= len(children):
				627	break
				628	#
				629	# Either paragraph material or something to recurse into:
				630	#
				631	if (children[start].nodeType == xml.dom.core.ELEMENT) \
				632	and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS):
				633	fixup_paras_helper(doc, children[start])
				634	start = skip_leading_nodes(children, start + 1)
				635	continue
				636	#
				637	# paragraph material:
				638	#
				639	build_para(doc, container, start, len(children))
				640	if DEBUG_PARA_FIXER and depth == 10:
				641	sys.exit(1)
				642	start = start + 1
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	643
				644
				645	def build_para(doc, parent, start, i):
				646	children = parent.childNodes
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	647	after = start + 1
				648	have_last = 0
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	649	BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	650	# Collect all children until \n\n+ is found in a text node or a
				651	# member of BREAK_ELEMENTS is found.
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	652	for j in range(start, i):
				653	after = j + 1
				654	child = children[j]
				655	nodeType = child.nodeType
				656	if nodeType == xml.dom.core.ELEMENT:
				657	if child.tagName in BREAK_ELEMENTS:
				658	after = j
				659	break
				660	elif nodeType == xml.dom.core.TEXT:
				661	pos = string.find(child.data, "\n\n")
				662	if pos == 0:
				663	after = j
				664	break
				665	if pos >= 1:
				666	child.splitText(pos)
				667	break
				668	else:
				669	have_last = 1
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	670	if (start + 1) > after:
				671	raise ConversionError(
				672	"build_para() could not identify content to turn into a paragraph")
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	673	if children[after - 1].nodeType == xml.dom.core.TEXT:
				674	# we may need to split off trailing white space:
				675	child = children[after - 1]
				676	data = child.data
				677	if string.rstrip(data) != data:
				678	have_last = 0
				679	child.splitText(len(string.rstrip(data)))
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	680	para = doc.createElement("para")
				681	prev = None
				682	indexes = range(start, after)
				683	indexes.reverse()
				684	for j in indexes:
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	685	node = parent.childNodes[j]
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	686	parent.removeChild(node)
				687	para.insertBefore(node, prev)
				688	prev = node
				689	if have_last:
				690	parent.appendChild(para)
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	691	return len(parent.childNodes)
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	692	else:
				693	parent.insertBefore(para, parent.childNodes[start])
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	694	return start + 1
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	695
				696
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	697	def skip_leading_nodes(children, start):
				698	"""Return index into children of a node at which paragraph building should
				699	begin or a recursive call to fixup_paras_helper() should be made (for
				700	subsections, etc.).
				701
				702	When the return value >= len(children), we've built all the paras we can
				703	from this list of children.
				704	"""
				705	i = len(children)
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	706	while i > start:
				707	# skip over leading comments and whitespace:
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	708	child = children[start]
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	709	nodeType = child.nodeType
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	710	if nodeType == xml.dom.core.TEXT:
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	711	data = child.data
				712	shortened = string.lstrip(data)
				713	if shortened:
				714	if data != shortened:
				715	# break into two nodes: whitespace and non-whitespace
				716	child.splitText(len(data) - len(shortened))
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	717	return start + 1
				718	return start
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	719	# all whitespace, just skip
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	720	elif nodeType == xml.dom.core.ELEMENT:
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	721	tagName = child.tagName
				722	if tagName in RECURSE_INTO_PARA_CONTAINERS:
				723	return start
				724	if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
				725	return start
				726	start = start + 1
				727	return start
Fred Drake	fba0ba2	1998-12-10 05:07:09 +0000	[diff] [blame]	728
				729
Fred Drake	d24167b	1999-01-14 21:18:03 +0000	[diff] [blame]	730	def fixup_rfc_references(doc):
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	731	for rfcnode in find_all_elements(doc, "rfc"):
				732	rfcnode.appendChild(doc.createTextNode(
				733	"RFC " + rfcnode.getAttribute("num")))
Fred Drake	d24167b	1999-01-14 21:18:03 +0000	[diff] [blame]	734
				735
				736	def fixup_signatures(doc):
				737	for child in doc.childNodes:
				738	if child.nodeType == xml.dom.core.ELEMENT:
				739	args = child.getElementsByTagName("args")
				740	for arg in args:
				741	fixup_args(doc, arg)
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	742	arg.normalize()
Fred Drake	d24167b	1999-01-14 21:18:03 +0000	[diff] [blame]	743	args = child.getElementsByTagName("constructor-args")
				744	for arg in args:
				745	fixup_args(doc, arg)
				746	arg.normalize()
				747
				748
				749	def fixup_args(doc, arglist):
				750	for child in arglist.childNodes:
				751	if child.nodeType == xml.dom.core.ELEMENT \
				752	and child.tagName == "optional":
				753	# found it; fix and return
				754	arglist.insertBefore(doc.createTextNode("["), child)
				755	optkids = child.childNodes
				756	while optkids:
				757	k = optkids[0]
				758	child.removeChild(k)
				759	arglist.insertBefore(k, child)
				760	arglist.insertBefore(doc.createTextNode("]"), child)
				761	arglist.removeChild(child)
				762	return fixup_args(doc, arglist)
				763
				764
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	765	def fixup_sectionauthors(doc):
				766	for sectauth in find_all_elements(doc, "sectionauthor"):
				767	section = sectauth.parentNode
				768	section.removeChild(sectauth)
				769	sectauth._node.name = "author"
				770	sectauth.appendChild(doc.createTextNode(
				771	sectauth.getAttribute("name")))
				772	sectauth.removeAttribute("name")
				773	after = section.childNodes[2]
				774	title = section.childNodes[1]
				775	if title.nodeType == xml.dom.core.ELEMENT and title.tagName != "title":
				776	after = section.childNodes[0]
				777	section.insertBefore(doc.createTextNode("\n "), after)
				778	section.insertBefore(sectauth, after)
				779
				780
Fred Drake	93d762f	1999-02-18 16:32:21 +0000	[diff] [blame]	781	def fixup_verbatims(doc):
				782	for verbatim in find_all_elements(doc, "verbatim"):
				783	child = verbatim.childNodes[0]
				784	if child.nodeType == xml.dom.core.TEXT \
				785	and string.lstrip(child.data)[:3] == ">>>":
				786	verbatim._node.name = "interpreter-session"
				787	#verbatim.setAttribute("interactive", "interactive")
				788
				789
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	790	_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	791
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	792	def write_esis(doc, ofp, knownempty):
				793	for node in doc.childNodes:
				794	nodeType = node.nodeType
				795	if nodeType == xml.dom.core.ELEMENT:
				796	gi = node.tagName
				797	if knownempty(gi):
				798	if node.hasChildNodes():
				799	raise ValueError, "declared-empty node has children"
				800	ofp.write("e\n")
				801	for k, v in node.attributes.items():
				802	value = v.value
				803	if _token_rx.match(value):
				804	dtype = "TOKEN"
				805	else:
				806	dtype = "CDATA"
				807	ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
				808	ofp.write("(%s\n" % gi)
				809	write_esis(node, ofp, knownempty)
				810	ofp.write(")%s\n" % gi)
				811	elif nodeType == xml.dom.core.TEXT:
				812	ofp.write("-%s\n" % esistools.encode(node.data))
				813	else:
				814	raise RuntimeError, "unsupported node type: %s" % nodeType
				815
				816
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	817	def convert(ifp, ofp):
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	818	p = esistools.ExtendedEsisBuilder()
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	819	p.feed(ifp.read())
				820	doc = p.document
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	821	normalize(doc)
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	822	simplify(doc)
				823	handle_labels(doc)
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	824	handle_appendix(doc)
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	825	fixup_trailing_whitespace(doc, {
				826	"abstract": "\n",
				827	"title": "",
				828	"chapter": "\n\n",
				829	"section": "\n\n",
				830	"subsection": "\n\n",
				831	"subsubsection": "\n\n",
				832	"paragraph": "\n\n",
				833	"subparagraph": "\n\n",
				834	})
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	835	cleanup_root_text(doc)
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	836	cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
Fred Drake	fba0ba2	1998-12-10 05:07:09 +0000	[diff] [blame]	837	cleanup_synopses(doc)
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	838	fixup_descriptors(doc)
Fred Drake	93d762f	1999-02-18 16:32:21 +0000	[diff] [blame]	839	fixup_verbatims(doc)
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	840	normalize(doc)
				841	fixup_paras(doc)
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	842	fixup_sectionauthors(doc)
Fred Drake	f8ebb55	1999-01-14 19:45:38 +0000	[diff] [blame]	843	remap_element_names(doc, {
				844	"tableii": ("table", {"cols": "2"}),
				845	"tableiii": ("table", {"cols": "3"}),
				846	"tableiv": ("table", {"cols": "4"}),
				847	"lineii": ("row", {}),
				848	"lineiii": ("row", {}),
				849	"lineiv": ("row", {}),
Fred Drake	d6ced7d	1999-01-19 17:11:23 +0000	[diff] [blame]	850	"refmodule": ("module", {"link": "link"}),
Fred Drake	f8ebb55	1999-01-14 19:45:38 +0000	[diff] [blame]	851	})
				852	fixup_table_structures(doc)
Fred Drake	d24167b	1999-01-14 21:18:03 +0000	[diff] [blame]	853	fixup_rfc_references(doc)
				854	fixup_signatures(doc)
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	855	#
				856	d = {}
				857	for gi in p.get_empties():
				858	d[gi] = gi
Fred Drake	d24167b	1999-01-14 21:18:03 +0000	[diff] [blame]	859	if d.has_key("rfc"):
				860	del d["rfc"]
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	861	knownempty = d.has_key
				862	#
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	863	try:
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	864	write_esis(doc, ofp, knownempty)
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	865	except IOError, (err, msg):
				866	# Ignore EPIPE; it just means that whoever we're writing to stopped
				867	# reading. The rest of the output would be ignored. All other errors
				868	# should still be reported,
				869	if err != errno.EPIPE:
				870	raise
				871
				872
				873	def main():
				874	if len(sys.argv) == 1:
				875	ifp = sys.stdin
				876	ofp = sys.stdout
				877	elif len(sys.argv) == 2:
				878	ifp = open(sys.argv[1])
				879	ofp = sys.stdout
				880	elif len(sys.argv) == 3:
				881	ifp = open(sys.argv[1])
				882	ofp = open(sys.argv[2], "w")
				883	else:
				884	usage()
				885	sys.exit(2)
				886	convert(ifp, ofp)
				887
				888
				889	if __name__ == "__main__":
				890	main()