Blame - Doc/tools/sgmlconv/docfixer.py - platform/external/python/cpython3

blob: 0b73126551600d08ca8bd837529a2c519524d330 [file] [log] [blame]

Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	1	#! /usr/bin/env python
				2
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	3	"""Perform massive transformations on a document tree created from the LaTeX
				4	of the Python documentation, and dump the ESIS data for the transformed tree.
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	5	"""
				6	__version__ = '$Revision$'
				7
				8
				9	import errno
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	10	import esistools
				11	import re
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	12	import string
				13	import sys
				14	import xml.dom.core
				15	import xml.dom.esis_builder
				16
				17
Fred Drake	f8ebb55	1999-01-14 19:45:38 +0000	[diff] [blame]	18	class ConversionError(Exception):
				19	pass
				20
				21
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	22	DEBUG_PARA_FIXER = 0
				23
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	24	if DEBUG_PARA_FIXER:
				25	def para_msg(s):
				26	sys.stderr.write("*** %s\n" % s)
				27	else:
				28	def para_msg(s):
				29	pass
				30
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	31
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	32	# Workaround to deal with invalid documents (multiple root elements). This
				33	# does not indicate a bug in the DOM implementation.
				34	#
				35	def get_documentElement(self):
				36	docelem = None
				37	for n in self._node.children:
				38	if n.type == xml.dom.core.ELEMENT:
				39	docelem = xml.dom.core.Element(n, self, self)
				40	return docelem
				41
				42	xml.dom.core.Document.get_documentElement = get_documentElement
				43
				44
				45	# Replace get_childNodes for the Document class; without this, children
				46	# accessed from the Document object via .childNodes (no matter how many
				47	# levels of access are used) will be given an ownerDocument of None.
				48	#
				49	def get_childNodes(self):
				50	return xml.dom.core.NodeList(self._node.children, self, self)
				51
				52	xml.dom.core.Document.get_childNodes = get_childNodes
				53
				54
				55	def get_first_element(doc, gi):
				56	for n in doc.childNodes:
				57	if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
				58	return n
				59
				60	def extract_first_element(doc, gi):
				61	node = get_first_element(doc, gi)
				62	if node is not None:
				63	doc.removeChild(node)
				64	return node
				65
				66
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	67	def find_all_elements(doc, gi):
				68	nodes = []
				69	if doc.nodeType == xml.dom.core.ELEMENT and doc.tagName == gi:
				70	nodes.append(doc)
				71	for child in doc.childNodes:
				72	if child.nodeType == xml.dom.core.ELEMENT:
				73	if child.tagName == gi:
				74	nodes.append(child)
				75	for node in child.getElementsByTagName(gi):
				76	nodes.append(node)
				77	return nodes
				78
				79
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	80	def simplify(doc):
				81	# Try to rationalize the document a bit, since these things are simply
				82	# not valid SGML/XML documents as they stand, and need a little work.
				83	documentclass = "document"
				84	inputs = []
				85	node = extract_first_element(doc, "documentclass")
				86	if node is not None:
				87	documentclass = node.getAttribute("classname")
				88	node = extract_first_element(doc, "title")
				89	if node is not None:
				90	inputs.append(node)
				91	# update the name of the root element
				92	node = get_first_element(doc, "document")
				93	if node is not None:
				94	node._node.name = documentclass
				95	while 1:
				96	node = extract_first_element(doc, "input")
				97	if node is None:
				98	break
				99	inputs.append(node)
				100	if inputs:
				101	docelem = doc.documentElement
				102	inputs.reverse()
				103	for node in inputs:
				104	text = doc.createTextNode("\n")
				105	docelem.insertBefore(text, docelem.firstChild)
				106	docelem.insertBefore(node, text)
				107	docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
				108	while doc.firstChild.nodeType == xml.dom.core.TEXT:
				109	doc.removeChild(doc.firstChild)
				110
				111
				112	def cleanup_root_text(doc):
				113	discards = []
				114	skip = 0
				115	for n in doc.childNodes:
				116	prevskip = skip
				117	skip = 0
				118	if n.nodeType == xml.dom.core.TEXT and not prevskip:
				119	discards.append(n)
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	120	elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT":
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	121	skip = 1
				122	for node in discards:
				123	doc.removeChild(node)
				124
				125
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	126	DESCRIPTOR_ELEMENTS = (
				127	"cfuncdesc", "cvardesc", "ctypedesc",
				128	"classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
				129	"excdesc", "funcdesc", "funcdescni", "opcodedesc",
				130	"datadesc", "datadescni",
				131	)
				132
				133	def fixup_descriptors(doc):
				134	for tagName in DESCRIPTOR_ELEMENTS:
				135	nodes = find_all_elements(doc, tagName)
				136	for node in nodes:
				137	rewrite_descriptor(doc, node)
				138
				139	def rewrite_descriptor(doc, descriptor):
				140	#
				141	# Do these things:
				142	# 1. Add an "index=noindex" attribute to the element if the tagName
				143	# ends in 'ni', removing the 'ni' from the name.
				144	# 2. Create a <signature> from the name attribute and <args>.
				145	# 3. Create additional <signature>s from <*line{,ni}> elements,
				146	# if found.
				147	# 4. Move remaining child nodes to a <description> element.
				148	# 5. Put it back together.
				149	#
				150	descname = descriptor.tagName
				151	index = 1
				152	if descname[-2:] == "ni":
				153	descname = descname[:-2]
				154	descriptor.setAttribute("index", "noindex")
				155	descriptor._node.name = descname
				156	index = 0
				157	desctype = descname[:-4] # remove 'desc'
				158	linename = desctype + "line"
				159	if not index:
				160	linename = linename + "ni"
				161	# 2.
				162	signature = doc.createElement("signature")
				163	name = doc.createElement("name")
				164	signature.appendChild(doc.createTextNode("\n "))
				165	signature.appendChild(name)
				166	name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
				167	descriptor.removeAttribute("name")
				168	if descriptor.attributes.has_key("var"):
				169	variable = descriptor.getAttribute("var")
				170	if variable:
				171	args = doc.createElement("args")
				172	args.appendChild(doc.createTextNode(variable))
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	173	signature.appendChild(doc.createTextNode("\n "))
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	174	signature.appendChild(args)
				175	descriptor.removeAttribute("var")
				176	newchildren = [signature]
				177	children = descriptor.childNodes
				178	pos = skip_leading_nodes(children, 0)
				179	if pos < len(children):
				180	child = children[pos]
				181	if child.nodeType == xml.dom.core.ELEMENT and child.tagName == "args":
				182	# create an <args> in <signature>:
				183	args = doc.createElement("args")
				184	argchildren = []
				185	map(argchildren.append, child.childNodes)
				186	for n in argchildren:
				187	child.removeChild(n)
				188	args.appendChild(n)
				189	signature.appendChild(doc.createTextNode("\n "))
				190	signature.appendChild(args)
				191	signature.appendChild(doc.createTextNode("\n "))
				192	# 3.
				193	pos = skip_leading_nodes(children, pos + 1)
				194	while pos < len(children) \
				195	and children[pos].nodeType == xml.dom.core.ELEMENT \
				196	and children[pos].tagName == linename:
				197	# this is really a supplemental signature, create <signature>
				198	sig = methodline_to_signature(doc, children[pos])
				199	newchildren.append(sig)
				200	pos = skip_leading_nodes(children, pos + 1)
				201	# 4.
				202	description = doc.createElement("description")
				203	description.appendChild(doc.createTextNode("\n"))
				204	newchildren.append(description)
				205	move_children(descriptor, description, pos)
				206	last = description.childNodes[-1]
				207	if last.nodeType == xml.dom.core.TEXT:
				208	last.data = string.rstrip(last.data) + "\n "
				209	# 5.
				210	# should have nothing but whitespace and signature lines in <descriptor>;
				211	# discard them
				212	while descriptor.childNodes:
				213	descriptor.removeChild(descriptor.childNodes[0])
				214	for node in newchildren:
				215	descriptor.appendChild(doc.createTextNode("\n "))
				216	descriptor.appendChild(node)
				217	descriptor.appendChild(doc.createTextNode("\n"))
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	218
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	219
				220	def methodline_to_signature(doc, methodline):
				221	signature = doc.createElement("signature")
				222	signature.appendChild(doc.createTextNode("\n "))
				223	name = doc.createElement("name")
				224	name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	225	methodline.removeAttribute("name")
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	226	signature.appendChild(name)
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	227	if len(methodline.childNodes):
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	228	args = doc.createElement("args")
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	229	signature.appendChild(doc.createTextNode("\n "))
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	230	signature.appendChild(args)
				231	move_children(methodline, args)
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	232	signature.appendChild(doc.createTextNode("\n "))
				233	return signature
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	234
				235
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	236	def move_children(origin, dest, start=0):
				237	children = origin.childNodes
				238	while start < len(children):
				239	node = children[start]
				240	origin.removeChild(node)
				241	dest.appendChild(node)
				242
				243
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	244	def handle_appendix(doc):
				245	# must be called after simplfy() if document is multi-rooted to begin with
				246	docelem = doc.documentElement
				247	toplevel = docelem.tagName == "manual" and "chapter" or "section"
				248	appendices = 0
				249	nodes = []
				250	for node in docelem.childNodes:
				251	if appendices:
				252	nodes.append(node)
				253	elif node.nodeType == xml.dom.core.ELEMENT:
				254	appnodes = node.getElementsByTagName("appendix")
				255	if appnodes:
				256	appendices = 1
				257	parent = appnodes[0].parentNode
				258	parent.removeChild(appnodes[0])
				259	parent.normalize()
				260	if nodes:
				261	map(docelem.removeChild, nodes)
				262	docelem.appendChild(doc.createTextNode("\n\n\n"))
				263	back = doc.createElement("back-matter")
				264	docelem.appendChild(back)
				265	back.appendChild(doc.createTextNode("\n"))
				266	while nodes and nodes[0].nodeType == xml.dom.core.TEXT \
				267	and not string.strip(nodes[0].data):
				268	del nodes[0]
				269	map(back.appendChild, nodes)
				270	docelem.appendChild(doc.createTextNode("\n"))
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	271
				272
				273	def handle_labels(doc):
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	274	for label in find_all_elements(doc, "label"):
				275	id = label.getAttribute("id")
				276	if not id:
				277	continue
				278	parent = label.parentNode
				279	if parent.tagName == "title":
				280	parent.parentNode.setAttribute("id", id)
				281	else:
				282	parent.setAttribute("id", id)
				283	# now, remove <label id="..."/> from parent:
				284	parent.removeChild(label)
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	285
				286
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	287	def fixup_trailing_whitespace(doc, wsmap):
				288	queue = [doc]
				289	while queue:
				290	node = queue[0]
				291	del queue[0]
				292	if node.nodeType == xml.dom.core.ELEMENT \
				293	and wsmap.has_key(node.tagName):
				294	ws = wsmap[node.tagName]
				295	children = node.childNodes
				296	children.reverse()
				297	if children[0].nodeType == xml.dom.core.TEXT:
				298	data = string.rstrip(children[0].data) + ws
				299	children[0].data = data
				300	children.reverse()
				301	# hack to get the title in place:
				302	if node.tagName == "title" \
				303	and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
				304	node.parentNode.insertBefore(doc.createText("\n "),
				305	node.parentNode.firstChild)
				306	for child in node.childNodes:
				307	if child.nodeType == xml.dom.core.ELEMENT:
				308	queue.append(child)
				309
				310
				311	def normalize(doc):
				312	for node in doc.childNodes:
				313	if node.nodeType == xml.dom.core.ELEMENT:
				314	node.normalize()
				315
				316
				317	def cleanup_trailing_parens(doc, element_names):
				318	d = {}
				319	for gi in element_names:
				320	d[gi] = gi
				321	rewrite_element = d.has_key
				322	queue = []
				323	for node in doc.childNodes:
				324	if node.nodeType == xml.dom.core.ELEMENT:
				325	queue.append(node)
				326	while queue:
				327	node = queue[0]
				328	del queue[0]
				329	if rewrite_element(node.tagName):
				330	children = node.childNodes
				331	if len(children) == 1 \
				332	and children[0].nodeType == xml.dom.core.TEXT:
				333	data = children[0].data
				334	if data[-2:] == "()":
				335	children[0].data = data[:-2]
				336	else:
				337	for child in node.childNodes:
				338	if child.nodeType == xml.dom.core.ELEMENT:
				339	queue.append(child)
				340
				341
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	342	def contents_match(left, right):
				343	left_children = left.childNodes
				344	right_children = right.childNodes
				345	if len(left_children) != len(right_children):
				346	return 0
				347	for l, r in map(None, left_children, right_children):
				348	nodeType = l.nodeType
				349	if nodeType != r.nodeType:
				350	return 0
				351	if nodeType == xml.dom.core.ELEMENT:
				352	if l.tagName != r.tagName:
				353	return 0
				354	# should check attributes, but that's not a problem here
				355	if not contents_match(l, r):
				356	return 0
				357	elif nodeType == xml.dom.core.TEXT:
				358	if l.data != r.data:
				359	return 0
				360	else:
				361	# not quite right, but good enough
				362	return 0
				363	return 1
				364
				365
				366	def create_module_info(doc, section):
				367	# Heavy.
				368	node = extract_first_element(section, "modulesynopsis")
				369	if node is None:
				370	return
				371	node._node.name = "synopsis"
				372	lastchild = node.childNodes[-1]
				373	if lastchild.nodeType == xml.dom.core.TEXT \
				374	and lastchild.data[-1:] == ".":
				375	lastchild.data = lastchild.data[:-1]
Fred Drake	4259f0d	1999-01-19 23:09:31 +0000	[diff] [blame]	376	modauthor = extract_first_element(section, "moduleauthor")
				377	if modauthor:
				378	modauthor._node.name = "author"
				379	modauthor.appendChild(doc.createTextNode(
				380	modauthor.getAttribute("name")))
				381	modauthor.removeAttribute("name")
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	382	if section.tagName == "section":
				383	modinfo_pos = 2
				384	modinfo = doc.createElement("moduleinfo")
				385	moddecl = extract_first_element(section, "declaremodule")
				386	name = None
				387	if moddecl:
				388	modinfo.appendChild(doc.createTextNode("\n "))
				389	name = moddecl.attributes["name"].value
				390	namenode = doc.createElement("name")
				391	namenode.appendChild(doc.createTextNode(name))
				392	modinfo.appendChild(namenode)
				393	type = moddecl.attributes.get("type")
				394	if type:
				395	type = type.value
				396	modinfo.appendChild(doc.createTextNode("\n "))
				397	typenode = doc.createElement("type")
				398	typenode.appendChild(doc.createTextNode(type))
				399	modinfo.appendChild(typenode)
				400	title = get_first_element(section, "title")
				401	if title:
				402	children = title.childNodes
				403	if len(children) >= 2 \
				404	and children[0].nodeType == xml.dom.core.ELEMENT \
				405	and children[0].tagName == "module" \
				406	and children[0].childNodes[0].data == name:
				407	# this is it; morph the <title> into <short-synopsis>
				408	first_data = children[1]
				409	if first_data.data[:4] == " ---":
				410	first_data.data = string.lstrip(first_data.data[4:])
				411	title._node.name = "short-synopsis"
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	412	if children[-1].nodeType == xml.dom.core.TEXT \
				413	and children[-1].data[-1:] == ".":
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	414	children[-1].data = children[-1].data[:-1]
				415	section.removeChild(title)
				416	section.removeChild(section.childNodes[0])
				417	title.removeChild(children[0])
				418	modinfo_pos = 0
				419	else:
				420	sys.stderr.write(
				421	"module name in title doesn't match"
				422	" <declaremodule>; no <short-synopsis>\n")
				423	else:
				424	sys.stderr.write(
				425	"Unexpected condition: <section> without <title>\n")
				426	modinfo.appendChild(doc.createTextNode("\n "))
				427	modinfo.appendChild(node)
				428	if title and not contents_match(title, node):
				429	# The short synopsis is actually different,
				430	# and needs to be stored:
				431	modinfo.appendChild(doc.createTextNode("\n "))
				432	modinfo.appendChild(title)
Fred Drake	4259f0d	1999-01-19 23:09:31 +0000	[diff] [blame]	433	if modauthor:
				434	modinfo.appendChild(doc.createTextNode("\n "))
				435	modinfo.appendChild(modauthor)
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	436	modinfo.appendChild(doc.createTextNode("\n "))
				437	section.insertBefore(modinfo, section.childNodes[modinfo_pos])
				438	section.insertBefore(doc.createTextNode("\n "), modinfo)
				439
				440
Fred Drake	fba0ba2	1998-12-10 05:07:09 +0000	[diff] [blame]	441	def cleanup_synopses(doc):
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	442	for node in find_all_elements(doc, "section"):
				443	create_module_info(doc, node)
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	444
				445
Fred Drake	f8ebb55	1999-01-14 19:45:38 +0000	[diff] [blame]	446	def remap_element_names(root, name_map):
				447	queue = []
				448	for child in root.childNodes:
				449	if child.nodeType == xml.dom.core.ELEMENT:
				450	queue.append(child)
				451	while queue:
				452	node = queue.pop()
				453	tagName = node.tagName
				454	if name_map.has_key(tagName):
				455	name, attrs = name_map[tagName]
				456	node._node.name = name
				457	for attr, value in attrs.items():
				458	node.setAttribute(attr, value)
				459	for child in node.childNodes:
				460	if child.nodeType == xml.dom.core.ELEMENT:
				461	queue.append(child)
				462
				463
				464	def fixup_table_structures(doc):
				465	# must be done after remap_element_names(), or the tables won't be found
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	466	for table in find_all_elements(doc, "table"):
				467	fixup_table(doc, table)
				468
Fred Drake	f8ebb55	1999-01-14 19:45:38 +0000	[diff] [blame]	469
				470	def fixup_table(doc, table):
				471	# create the table head
				472	thead = doc.createElement("thead")
				473	row = doc.createElement("row")
				474	move_elements_by_name(doc, table, row, "entry")
				475	thead.appendChild(doc.createTextNode("\n "))
				476	thead.appendChild(row)
				477	thead.appendChild(doc.createTextNode("\n "))
				478	# create the table body
				479	tbody = doc.createElement("tbody")
				480	prev_row = None
				481	last_was_hline = 0
				482	children = table.childNodes
				483	for child in children:
				484	if child.nodeType == xml.dom.core.ELEMENT:
				485	tagName = child.tagName
				486	if tagName == "hline" and prev_row is not None:
				487	prev_row.setAttribute("rowsep", "1")
				488	elif tagName == "row":
				489	prev_row = child
				490	# save the rows:
				491	tbody.appendChild(doc.createTextNode("\n "))
				492	move_elements_by_name(doc, table, tbody, "row", sep="\n ")
				493	# and toss the rest:
				494	while children:
				495	child = children[0]
				496	nodeType = child.nodeType
				497	if nodeType == xml.dom.core.TEXT:
				498	if string.strip(child.data):
				499	raise ConversionError("unexpected free data in table")
				500	table.removeChild(child)
				501	continue
				502	if nodeType == xml.dom.core.ELEMENT:
				503	if child.tagName != "hline":
				504	raise ConversionError(
				505	"unexpected <%s> in table" % child.tagName)
				506	table.removeChild(child)
				507	continue
				508	raise ConversionError(
				509	"unexpected %s node in table" % child.__class__.__name__)
				510	# nothing left in the <table>; add the <thead> and <tbody>
				511	tgroup = doc.createElement("tgroup")
				512	tgroup.appendChild(doc.createTextNode("\n "))
				513	tgroup.appendChild(thead)
				514	tgroup.appendChild(doc.createTextNode("\n "))
				515	tgroup.appendChild(tbody)
				516	tgroup.appendChild(doc.createTextNode("\n "))
				517	table.appendChild(tgroup)
				518	# now make the <entry>s look nice:
				519	for row in table.getElementsByTagName("row"):
				520	fixup_row(doc, row)
				521
				522
				523	def fixup_row(doc, row):
				524	entries = []
				525	map(entries.append, row.childNodes[1:])
				526	for entry in entries:
				527	row.insertBefore(doc.createTextNode("\n "), entry)
				528	# row.appendChild(doc.createTextNode("\n "))
				529
				530
				531	def move_elements_by_name(doc, source, dest, name, sep=None):
				532	nodes = []
				533	for child in source.childNodes:
				534	if child.nodeType == xml.dom.core.ELEMENT and child.tagName == name:
				535	nodes.append(child)
				536	for node in nodes:
				537	source.removeChild(node)
				538	dest.appendChild(node)
				539	if sep:
				540	dest.appendChild(doc.createTextNode(sep))
				541
				542
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	543	RECURSE_INTO_PARA_CONTAINERS = (
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	544	"chapter", "abstract", "enumerate",
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	545	"section", "subsection", "subsubsection",
				546	"paragraph", "subparagraph",
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	547	"howto", "manual",
Fred Drake	4259f0d	1999-01-19 23:09:31 +0000	[diff] [blame]	548	)
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	549
				550	PARA_LEVEL_ELEMENTS = (
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	551	"moduleinfo", "title", "verbatim", "enumerate", "item",
				552	"opcodedesc", "classdesc", "datadesc",
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	553	"funcdesc", "methoddesc", "excdesc",
				554	"funcdescni", "methoddescni", "excdescni",
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	555	"tableii", "tableiii", "tableiv", "localmoduletable",
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	556	"sectionauthor", "seealso",
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	557	# include <para>, so we can just do it again to get subsequent paras:
				558	"para",
				559	)
				560
				561	PARA_LEVEL_PRECEEDERS = (
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	562	"index", "indexii", "indexiii", "indexiv", "setindexsubitem",
				563	"stindex", "obindex", "COMMENT", "label", "input", "title",
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	564	)
				565
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	566
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	567	def fixup_paras(doc):
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	568	for child in doc.childNodes:
				569	if child.nodeType == xml.dom.core.ELEMENT \
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	570	and child.tagName in RECURSE_INTO_PARA_CONTAINERS:
				571	#
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	572	fixup_paras_helper(doc, child)
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	573	descriptions = find_all_elements(doc, "description")
				574	for description in descriptions:
				575	fixup_paras_helper(doc, description)
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	576
				577
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	578	def fixup_paras_helper(doc, container, depth=0):
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	579	# document is already normalized
				580	children = container.childNodes
				581	start = 0
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	582	while len(children) > start:
				583	start = skip_leading_nodes(children, start)
				584	if start >= len(children):
				585	break
				586	#
				587	# Either paragraph material or something to recurse into:
				588	#
				589	if (children[start].nodeType == xml.dom.core.ELEMENT) \
				590	and (children[start].tagName in RECURSE_INTO_PARA_CONTAINERS):
				591	fixup_paras_helper(doc, children[start])
				592	start = skip_leading_nodes(children, start + 1)
				593	continue
				594	#
				595	# paragraph material:
				596	#
				597	build_para(doc, container, start, len(children))
				598	if DEBUG_PARA_FIXER and depth == 10:
				599	sys.exit(1)
				600	start = start + 1
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	601
				602
				603	def build_para(doc, parent, start, i):
				604	children = parent.childNodes
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	605	after = start + 1
				606	have_last = 0
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	607	BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	608	# Collect all children until \n\n+ is found in a text node or a
				609	# member of BREAK_ELEMENTS is found.
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	610	for j in range(start, i):
				611	after = j + 1
				612	child = children[j]
				613	nodeType = child.nodeType
				614	if nodeType == xml.dom.core.ELEMENT:
				615	if child.tagName in BREAK_ELEMENTS:
				616	after = j
				617	break
				618	elif nodeType == xml.dom.core.TEXT:
				619	pos = string.find(child.data, "\n\n")
				620	if pos == 0:
				621	after = j
				622	break
				623	if pos >= 1:
				624	child.splitText(pos)
				625	break
				626	else:
				627	have_last = 1
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	628	if (start + 1) > after:
				629	raise ConversionError(
				630	"build_para() could not identify content to turn into a paragraph")
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	631	if children[after - 1].nodeType == xml.dom.core.TEXT:
				632	# we may need to split off trailing white space:
				633	child = children[after - 1]
				634	data = child.data
				635	if string.rstrip(data) != data:
				636	have_last = 0
				637	child.splitText(len(string.rstrip(data)))
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	638	para = doc.createElement("para")
				639	prev = None
				640	indexes = range(start, after)
				641	indexes.reverse()
				642	for j in indexes:
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	643	node = parent.childNodes[j]
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	644	parent.removeChild(node)
				645	para.insertBefore(node, prev)
				646	prev = node
				647	if have_last:
				648	parent.appendChild(para)
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	649	return len(parent.childNodes)
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	650	else:
				651	parent.insertBefore(para, parent.childNodes[start])
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	652	return start + 1
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	653
				654
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	655	def skip_leading_nodes(children, start):
				656	"""Return index into children of a node at which paragraph building should
				657	begin or a recursive call to fixup_paras_helper() should be made (for
				658	subsections, etc.).
				659
				660	When the return value >= len(children), we've built all the paras we can
				661	from this list of children.
				662	"""
				663	i = len(children)
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	664	while i > start:
				665	# skip over leading comments and whitespace:
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	666	child = children[start]
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	667	nodeType = child.nodeType
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	668	if nodeType == xml.dom.core.TEXT:
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	669	data = child.data
				670	shortened = string.lstrip(data)
				671	if shortened:
				672	if data != shortened:
				673	# break into two nodes: whitespace and non-whitespace
				674	child.splitText(len(data) - len(shortened))
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	675	return start + 1
				676	return start
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	677	# all whitespace, just skip
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	678	elif nodeType == xml.dom.core.ELEMENT:
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	679	tagName = child.tagName
				680	if tagName in RECURSE_INTO_PARA_CONTAINERS:
				681	return start
				682	if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
				683	return start
				684	start = start + 1
				685	return start
Fred Drake	fba0ba2	1998-12-10 05:07:09 +0000	[diff] [blame]	686
				687
Fred Drake	d24167b	1999-01-14 21:18:03 +0000	[diff] [blame]	688	def fixup_rfc_references(doc):
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	689	for rfcnode in find_all_elements(doc, "rfc"):
				690	rfcnode.appendChild(doc.createTextNode(
				691	"RFC " + rfcnode.getAttribute("num")))
Fred Drake	d24167b	1999-01-14 21:18:03 +0000	[diff] [blame]	692
				693
				694	def fixup_signatures(doc):
				695	for child in doc.childNodes:
				696	if child.nodeType == xml.dom.core.ELEMENT:
				697	args = child.getElementsByTagName("args")
				698	for arg in args:
				699	fixup_args(doc, arg)
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	700	arg.normalize()
Fred Drake	d24167b	1999-01-14 21:18:03 +0000	[diff] [blame]	701	args = child.getElementsByTagName("constructor-args")
				702	for arg in args:
				703	fixup_args(doc, arg)
				704	arg.normalize()
				705
				706
				707	def fixup_args(doc, arglist):
				708	for child in arglist.childNodes:
				709	if child.nodeType == xml.dom.core.ELEMENT \
				710	and child.tagName == "optional":
				711	# found it; fix and return
				712	arglist.insertBefore(doc.createTextNode("["), child)
				713	optkids = child.childNodes
				714	while optkids:
				715	k = optkids[0]
				716	child.removeChild(k)
				717	arglist.insertBefore(k, child)
				718	arglist.insertBefore(doc.createTextNode("]"), child)
				719	arglist.removeChild(child)
				720	return fixup_args(doc, arglist)
				721
				722
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	723	def fixup_sectionauthors(doc):
				724	for sectauth in find_all_elements(doc, "sectionauthor"):
				725	section = sectauth.parentNode
				726	section.removeChild(sectauth)
				727	sectauth._node.name = "author"
				728	sectauth.appendChild(doc.createTextNode(
				729	sectauth.getAttribute("name")))
				730	sectauth.removeAttribute("name")
				731	after = section.childNodes[2]
				732	title = section.childNodes[1]
				733	if title.nodeType == xml.dom.core.ELEMENT and title.tagName != "title":
				734	after = section.childNodes[0]
				735	section.insertBefore(doc.createTextNode("\n "), after)
				736	section.insertBefore(sectauth, after)
				737
				738
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	739	_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	740
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	741	def write_esis(doc, ofp, knownempty):
				742	for node in doc.childNodes:
				743	nodeType = node.nodeType
				744	if nodeType == xml.dom.core.ELEMENT:
				745	gi = node.tagName
				746	if knownempty(gi):
				747	if node.hasChildNodes():
				748	raise ValueError, "declared-empty node has children"
				749	ofp.write("e\n")
				750	for k, v in node.attributes.items():
				751	value = v.value
				752	if _token_rx.match(value):
				753	dtype = "TOKEN"
				754	else:
				755	dtype = "CDATA"
				756	ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
				757	ofp.write("(%s\n" % gi)
				758	write_esis(node, ofp, knownempty)
				759	ofp.write(")%s\n" % gi)
				760	elif nodeType == xml.dom.core.TEXT:
				761	ofp.write("-%s\n" % esistools.encode(node.data))
				762	else:
				763	raise RuntimeError, "unsupported node type: %s" % nodeType
				764
				765
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	766	def convert(ifp, ofp):
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	767	p = esistools.ExtendedEsisBuilder()
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	768	p.feed(ifp.read())
				769	doc = p.document
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	770	normalize(doc)
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	771	simplify(doc)
				772	handle_labels(doc)
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	773	handle_appendix(doc)
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	774	fixup_trailing_whitespace(doc, {
				775	"abstract": "\n",
				776	"title": "",
				777	"chapter": "\n\n",
				778	"section": "\n\n",
				779	"subsection": "\n\n",
				780	"subsubsection": "\n\n",
				781	"paragraph": "\n\n",
				782	"subparagraph": "\n\n",
				783	})
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	784	cleanup_root_text(doc)
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	785	cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
Fred Drake	fba0ba2	1998-12-10 05:07:09 +0000	[diff] [blame]	786	cleanup_synopses(doc)
Fred Drake	cb65781	1999-01-29 20:55:07 +0000	[diff] [blame]	787	fixup_descriptors(doc)
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	788	normalize(doc)
				789	fixup_paras(doc)
Fred Drake	7dab6af	1999-01-28 23:59:58 +0000	[diff] [blame]	790	fixup_sectionauthors(doc)
Fred Drake	f8ebb55	1999-01-14 19:45:38 +0000	[diff] [blame]	791	remap_element_names(doc, {
				792	"tableii": ("table", {"cols": "2"}),
				793	"tableiii": ("table", {"cols": "3"}),
				794	"tableiv": ("table", {"cols": "4"}),
				795	"lineii": ("row", {}),
				796	"lineiii": ("row", {}),
				797	"lineiv": ("row", {}),
Fred Drake	d6ced7d	1999-01-19 17:11:23 +0000	[diff] [blame]	798	"refmodule": ("module", {"link": "link"}),
Fred Drake	f8ebb55	1999-01-14 19:45:38 +0000	[diff] [blame]	799	})
				800	fixup_table_structures(doc)
Fred Drake	d24167b	1999-01-14 21:18:03 +0000	[diff] [blame]	801	fixup_rfc_references(doc)
				802	fixup_signatures(doc)
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	803	#
				804	d = {}
				805	for gi in p.get_empties():
				806	d[gi] = gi
Fred Drake	d24167b	1999-01-14 21:18:03 +0000	[diff] [blame]	807	if d.has_key("rfc"):
				808	del d["rfc"]
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	809	knownempty = d.has_key
				810	#
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	811	try:
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	812	write_esis(doc, ofp, knownempty)
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	813	except IOError, (err, msg):
				814	# Ignore EPIPE; it just means that whoever we're writing to stopped
				815	# reading. The rest of the output would be ignored. All other errors
				816	# should still be reported,
				817	if err != errno.EPIPE:
				818	raise
				819
				820
				821	def main():
				822	if len(sys.argv) == 1:
				823	ifp = sys.stdin
				824	ofp = sys.stdout
				825	elif len(sys.argv) == 2:
				826	ifp = open(sys.argv[1])
				827	ofp = sys.stdout
				828	elif len(sys.argv) == 3:
				829	ifp = open(sys.argv[1])
				830	ofp = open(sys.argv[2], "w")
				831	else:
				832	usage()
				833	sys.exit(2)
				834	convert(ifp, ofp)
				835
				836
				837	if __name__ == "__main__":
				838	main()