Blame - Doc/tools/sgmlconv/docfixer.py - platform/external/python/cpython3

blob: 9628e30045a46a3f0814280f39a880dd55910540 [file] [log] [blame]

Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	1	#! /usr/bin/env python
				2
				3	"""Promote the IDs from <label/> elements to the enclosing section / chapter /
				4	whatever, then remove the <label/> elements. This allows *ML style internal
				5	linking rather than the bogus LaTeX model.
				6
				7	Note that <label/>s in <title> elements are promoted two steps, since the
				8	<title> elements are artificially created from the section parameter, and the
				9	label really refers to the sectioning construct.
				10	"""
				11	__version__ = '$Revision$'
				12
				13
				14	import errno
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	15	import esistools
				16	import re
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	17	import string
				18	import sys
				19	import xml.dom.core
				20	import xml.dom.esis_builder
				21
				22
Fred Drake	f8ebb55	1999-01-14 19:45:38 +0000	[diff] [blame]	23	class ConversionError(Exception):
				24	pass
				25
				26
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	27	DEBUG_PARA_FIXER = 0
				28
				29
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	30	# Workaround to deal with invalid documents (multiple root elements). This
				31	# does not indicate a bug in the DOM implementation.
				32	#
				33	def get_documentElement(self):
				34	docelem = None
				35	for n in self._node.children:
				36	if n.type == xml.dom.core.ELEMENT:
				37	docelem = xml.dom.core.Element(n, self, self)
				38	return docelem
				39
				40	xml.dom.core.Document.get_documentElement = get_documentElement
				41
				42
				43	# Replace get_childNodes for the Document class; without this, children
				44	# accessed from the Document object via .childNodes (no matter how many
				45	# levels of access are used) will be given an ownerDocument of None.
				46	#
				47	def get_childNodes(self):
				48	return xml.dom.core.NodeList(self._node.children, self, self)
				49
				50	xml.dom.core.Document.get_childNodes = get_childNodes
				51
				52
				53	def get_first_element(doc, gi):
				54	for n in doc.childNodes:
				55	if n.nodeType == xml.dom.core.ELEMENT and n.tagName == gi:
				56	return n
				57
				58	def extract_first_element(doc, gi):
				59	node = get_first_element(doc, gi)
				60	if node is not None:
				61	doc.removeChild(node)
				62	return node
				63
				64
				65	def simplify(doc):
				66	# Try to rationalize the document a bit, since these things are simply
				67	# not valid SGML/XML documents as they stand, and need a little work.
				68	documentclass = "document"
				69	inputs = []
				70	node = extract_first_element(doc, "documentclass")
				71	if node is not None:
				72	documentclass = node.getAttribute("classname")
				73	node = extract_first_element(doc, "title")
				74	if node is not None:
				75	inputs.append(node)
				76	# update the name of the root element
				77	node = get_first_element(doc, "document")
				78	if node is not None:
				79	node._node.name = documentclass
				80	while 1:
				81	node = extract_first_element(doc, "input")
				82	if node is None:
				83	break
				84	inputs.append(node)
				85	if inputs:
				86	docelem = doc.documentElement
				87	inputs.reverse()
				88	for node in inputs:
				89	text = doc.createTextNode("\n")
				90	docelem.insertBefore(text, docelem.firstChild)
				91	docelem.insertBefore(node, text)
				92	docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
				93	while doc.firstChild.nodeType == xml.dom.core.TEXT:
				94	doc.removeChild(doc.firstChild)
				95
				96
				97	def cleanup_root_text(doc):
				98	discards = []
				99	skip = 0
				100	for n in doc.childNodes:
				101	prevskip = skip
				102	skip = 0
				103	if n.nodeType == xml.dom.core.TEXT and not prevskip:
				104	discards.append(n)
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	105	elif n.nodeType == xml.dom.core.ELEMENT and n.tagName == "COMMENT":
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	106	skip = 1
				107	for node in discards:
				108	doc.removeChild(node)
				109
				110
				111	def rewrite_desc_entries(doc, argname_gi):
				112	argnodes = doc.getElementsByTagName(argname_gi)
				113	for node in argnodes:
				114	parent = node.parentNode
				115	nodes = []
				116	for n in parent.childNodes:
				117	if n.nodeType != xml.dom.core.ELEMENT or n.tagName != argname_gi:
				118	nodes.append(n)
				119	desc = doc.createElement("description")
				120	for n in nodes:
				121	parent.removeChild(n)
				122	desc.appendChild(n)
				123	if node.childNodes:
				124	# keep the <args>...</args>, newline & indent
				125	parent.insertBefore(doc.createText("\n "), node)
				126	else:
				127	# no arguments, remove the <args/> node
				128	parent.removeChild(node)
				129	parent.appendChild(doc.createText("\n "))
				130	parent.appendChild(desc)
				131	parent.appendChild(doc.createText("\n"))
				132
				133	def handle_args(doc):
				134	rewrite_desc_entries(doc, "args")
				135	rewrite_desc_entries(doc, "constructor-args")
				136
				137
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	138	def handle_appendix(doc):
				139	# must be called after simplfy() if document is multi-rooted to begin with
				140	docelem = doc.documentElement
				141	toplevel = docelem.tagName == "manual" and "chapter" or "section"
				142	appendices = 0
				143	nodes = []
				144	for node in docelem.childNodes:
				145	if appendices:
				146	nodes.append(node)
				147	elif node.nodeType == xml.dom.core.ELEMENT:
				148	appnodes = node.getElementsByTagName("appendix")
				149	if appnodes:
				150	appendices = 1
				151	parent = appnodes[0].parentNode
				152	parent.removeChild(appnodes[0])
				153	parent.normalize()
				154	if nodes:
				155	map(docelem.removeChild, nodes)
				156	docelem.appendChild(doc.createTextNode("\n\n\n"))
				157	back = doc.createElement("back-matter")
				158	docelem.appendChild(back)
				159	back.appendChild(doc.createTextNode("\n"))
				160	while nodes and nodes[0].nodeType == xml.dom.core.TEXT \
				161	and not string.strip(nodes[0].data):
				162	del nodes[0]
				163	map(back.appendChild, nodes)
				164	docelem.appendChild(doc.createTextNode("\n"))
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	165
				166
				167	def handle_labels(doc):
Fred Drake	2664db9	1999-01-19 21:46:48 +0000	[diff] [blame]	168	for node in doc.childNodes:
				169	if node.nodeType == xml.dom.core.ELEMENT:
				170	labels = node.getElementsByTagName("label")
				171	for label in labels:
				172	id = label.getAttribute("id")
				173	if not id:
				174	continue
				175	parent = label.parentNode
				176	if parent.tagName == "title":
				177	parent.parentNode.setAttribute("id", id)
				178	else:
				179	parent.setAttribute("id", id)
				180	# now, remove <label id="..."/> from parent:
				181	parent.removeChild(label)
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	182
				183
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	184	def fixup_trailing_whitespace(doc, wsmap):
				185	queue = [doc]
				186	while queue:
				187	node = queue[0]
				188	del queue[0]
				189	if node.nodeType == xml.dom.core.ELEMENT \
				190	and wsmap.has_key(node.tagName):
				191	ws = wsmap[node.tagName]
				192	children = node.childNodes
				193	children.reverse()
				194	if children[0].nodeType == xml.dom.core.TEXT:
				195	data = string.rstrip(children[0].data) + ws
				196	children[0].data = data
				197	children.reverse()
				198	# hack to get the title in place:
				199	if node.tagName == "title" \
				200	and node.parentNode.firstChild.nodeType == xml.dom.core.ELEMENT:
				201	node.parentNode.insertBefore(doc.createText("\n "),
				202	node.parentNode.firstChild)
				203	for child in node.childNodes:
				204	if child.nodeType == xml.dom.core.ELEMENT:
				205	queue.append(child)
				206
				207
				208	def normalize(doc):
				209	for node in doc.childNodes:
				210	if node.nodeType == xml.dom.core.ELEMENT:
				211	node.normalize()
				212
				213
				214	def cleanup_trailing_parens(doc, element_names):
				215	d = {}
				216	for gi in element_names:
				217	d[gi] = gi
				218	rewrite_element = d.has_key
				219	queue = []
				220	for node in doc.childNodes:
				221	if node.nodeType == xml.dom.core.ELEMENT:
				222	queue.append(node)
				223	while queue:
				224	node = queue[0]
				225	del queue[0]
				226	if rewrite_element(node.tagName):
				227	children = node.childNodes
				228	if len(children) == 1 \
				229	and children[0].nodeType == xml.dom.core.TEXT:
				230	data = children[0].data
				231	if data[-2:] == "()":
				232	children[0].data = data[:-2]
				233	else:
				234	for child in node.childNodes:
				235	if child.nodeType == xml.dom.core.ELEMENT:
				236	queue.append(child)
				237
				238
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	239	def contents_match(left, right):
				240	left_children = left.childNodes
				241	right_children = right.childNodes
				242	if len(left_children) != len(right_children):
				243	return 0
				244	for l, r in map(None, left_children, right_children):
				245	nodeType = l.nodeType
				246	if nodeType != r.nodeType:
				247	return 0
				248	if nodeType == xml.dom.core.ELEMENT:
				249	if l.tagName != r.tagName:
				250	return 0
				251	# should check attributes, but that's not a problem here
				252	if not contents_match(l, r):
				253	return 0
				254	elif nodeType == xml.dom.core.TEXT:
				255	if l.data != r.data:
				256	return 0
				257	else:
				258	# not quite right, but good enough
				259	return 0
				260	return 1
				261
				262
				263	def create_module_info(doc, section):
				264	# Heavy.
				265	node = extract_first_element(section, "modulesynopsis")
				266	if node is None:
				267	return
				268	node._node.name = "synopsis"
				269	lastchild = node.childNodes[-1]
				270	if lastchild.nodeType == xml.dom.core.TEXT \
				271	and lastchild.data[-1:] == ".":
				272	lastchild.data = lastchild.data[:-1]
Fred Drake	4259f0d	1999-01-19 23:09:31 +0000	[diff] [blame]	273	modauthor = extract_first_element(section, "moduleauthor")
				274	if modauthor:
				275	modauthor._node.name = "author"
				276	modauthor.appendChild(doc.createTextNode(
				277	modauthor.getAttribute("name")))
				278	modauthor.removeAttribute("name")
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	279	if section.tagName == "section":
				280	modinfo_pos = 2
				281	modinfo = doc.createElement("moduleinfo")
				282	moddecl = extract_first_element(section, "declaremodule")
				283	name = None
				284	if moddecl:
				285	modinfo.appendChild(doc.createTextNode("\n "))
				286	name = moddecl.attributes["name"].value
				287	namenode = doc.createElement("name")
				288	namenode.appendChild(doc.createTextNode(name))
				289	modinfo.appendChild(namenode)
				290	type = moddecl.attributes.get("type")
				291	if type:
				292	type = type.value
				293	modinfo.appendChild(doc.createTextNode("\n "))
				294	typenode = doc.createElement("type")
				295	typenode.appendChild(doc.createTextNode(type))
				296	modinfo.appendChild(typenode)
				297	title = get_first_element(section, "title")
				298	if title:
				299	children = title.childNodes
				300	if len(children) >= 2 \
				301	and children[0].nodeType == xml.dom.core.ELEMENT \
				302	and children[0].tagName == "module" \
				303	and children[0].childNodes[0].data == name:
				304	# this is it; morph the <title> into <short-synopsis>
				305	first_data = children[1]
				306	if first_data.data[:4] == " ---":
				307	first_data.data = string.lstrip(first_data.data[4:])
				308	title._node.name = "short-synopsis"
				309	if children[-1].data[-1:] == ".":
				310	children[-1].data = children[-1].data[:-1]
				311	section.removeChild(title)
				312	section.removeChild(section.childNodes[0])
				313	title.removeChild(children[0])
				314	modinfo_pos = 0
				315	else:
				316	sys.stderr.write(
				317	"module name in title doesn't match"
				318	" <declaremodule>; no <short-synopsis>\n")
				319	else:
				320	sys.stderr.write(
				321	"Unexpected condition: <section> without <title>\n")
				322	modinfo.appendChild(doc.createTextNode("\n "))
				323	modinfo.appendChild(node)
				324	if title and not contents_match(title, node):
				325	# The short synopsis is actually different,
				326	# and needs to be stored:
				327	modinfo.appendChild(doc.createTextNode("\n "))
				328	modinfo.appendChild(title)
Fred Drake	4259f0d	1999-01-19 23:09:31 +0000	[diff] [blame]	329	if modauthor:
				330	modinfo.appendChild(doc.createTextNode("\n "))
				331	modinfo.appendChild(modauthor)
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	332	modinfo.appendChild(doc.createTextNode("\n "))
				333	section.insertBefore(modinfo, section.childNodes[modinfo_pos])
				334	section.insertBefore(doc.createTextNode("\n "), modinfo)
				335
				336
Fred Drake	fba0ba2	1998-12-10 05:07:09 +0000	[diff] [blame]	337	def cleanup_synopses(doc):
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	338	for node in doc.childNodes:
				339	if node.nodeType == xml.dom.core.ELEMENT \
				340	and node.tagName == "section":
				341	create_module_info(doc, node)
				342
				343
Fred Drake	f8ebb55	1999-01-14 19:45:38 +0000	[diff] [blame]	344	def remap_element_names(root, name_map):
				345	queue = []
				346	for child in root.childNodes:
				347	if child.nodeType == xml.dom.core.ELEMENT:
				348	queue.append(child)
				349	while queue:
				350	node = queue.pop()
				351	tagName = node.tagName
				352	if name_map.has_key(tagName):
				353	name, attrs = name_map[tagName]
				354	node._node.name = name
				355	for attr, value in attrs.items():
				356	node.setAttribute(attr, value)
				357	for child in node.childNodes:
				358	if child.nodeType == xml.dom.core.ELEMENT:
				359	queue.append(child)
				360
				361
				362	def fixup_table_structures(doc):
				363	# must be done after remap_element_names(), or the tables won't be found
				364	for child in doc.childNodes:
				365	if child.nodeType == xml.dom.core.ELEMENT:
				366	tables = child.getElementsByTagName("table")
				367	for table in tables:
				368	fixup_table(doc, table)
				369
				370	def fixup_table(doc, table):
				371	# create the table head
				372	thead = doc.createElement("thead")
				373	row = doc.createElement("row")
				374	move_elements_by_name(doc, table, row, "entry")
				375	thead.appendChild(doc.createTextNode("\n "))
				376	thead.appendChild(row)
				377	thead.appendChild(doc.createTextNode("\n "))
				378	# create the table body
				379	tbody = doc.createElement("tbody")
				380	prev_row = None
				381	last_was_hline = 0
				382	children = table.childNodes
				383	for child in children:
				384	if child.nodeType == xml.dom.core.ELEMENT:
				385	tagName = child.tagName
				386	if tagName == "hline" and prev_row is not None:
				387	prev_row.setAttribute("rowsep", "1")
				388	elif tagName == "row":
				389	prev_row = child
				390	# save the rows:
				391	tbody.appendChild(doc.createTextNode("\n "))
				392	move_elements_by_name(doc, table, tbody, "row", sep="\n ")
				393	# and toss the rest:
				394	while children:
				395	child = children[0]
				396	nodeType = child.nodeType
				397	if nodeType == xml.dom.core.TEXT:
				398	if string.strip(child.data):
				399	raise ConversionError("unexpected free data in table")
				400	table.removeChild(child)
				401	continue
				402	if nodeType == xml.dom.core.ELEMENT:
				403	if child.tagName != "hline":
				404	raise ConversionError(
				405	"unexpected <%s> in table" % child.tagName)
				406	table.removeChild(child)
				407	continue
				408	raise ConversionError(
				409	"unexpected %s node in table" % child.__class__.__name__)
				410	# nothing left in the <table>; add the <thead> and <tbody>
				411	tgroup = doc.createElement("tgroup")
				412	tgroup.appendChild(doc.createTextNode("\n "))
				413	tgroup.appendChild(thead)
				414	tgroup.appendChild(doc.createTextNode("\n "))
				415	tgroup.appendChild(tbody)
				416	tgroup.appendChild(doc.createTextNode("\n "))
				417	table.appendChild(tgroup)
				418	# now make the <entry>s look nice:
				419	for row in table.getElementsByTagName("row"):
				420	fixup_row(doc, row)
				421
				422
				423	def fixup_row(doc, row):
				424	entries = []
				425	map(entries.append, row.childNodes[1:])
				426	for entry in entries:
				427	row.insertBefore(doc.createTextNode("\n "), entry)
				428	# row.appendChild(doc.createTextNode("\n "))
				429
				430
				431	def move_elements_by_name(doc, source, dest, name, sep=None):
				432	nodes = []
				433	for child in source.childNodes:
				434	if child.nodeType == xml.dom.core.ELEMENT and child.tagName == name:
				435	nodes.append(child)
				436	for node in nodes:
				437	source.removeChild(node)
				438	dest.appendChild(node)
				439	if sep:
				440	dest.appendChild(doc.createTextNode(sep))
				441
				442
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	443	FIXUP_PARA_ELEMENTS = (
				444	"chapter",
				445	"section", "subsection", "subsubsection",
Fred Drake	4259f0d	1999-01-19 23:09:31 +0000	[diff] [blame]	446	"paragraph", "subparagraph", "description",
				447	"opcodedesc", "classdesc",
				448	"funcdesc", "methoddesc", "excdesc", "datadesc",
				449	"funcdescni", "methoddescni", "excdescni", "datadescni",
				450	)
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	451
				452	PARA_LEVEL_ELEMENTS = (
Fred Drake	4259f0d	1999-01-19 23:09:31 +0000	[diff] [blame]	453	"moduleinfo", "title", "verbatim",
				454	"opcodedesc", "classdesc",
				455	"funcdesc", "methoddesc", "excdesc", "datadesc",
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	456	"funcdescni", "methoddescni", "excdescni", "datadescni",
				457	"tableii", "tableiii", "tableiv", "localmoduletable",
				458	"sectionauthor",
				459	# include <para>, so we can just do it again to get subsequent paras:
				460	"para",
				461	)
				462
				463	PARA_LEVEL_PRECEEDERS = (
				464	"index", "indexii", "indexiii", "indexiv",
				465	"stindex", "obindex", "COMMENT", "label",
				466	)
				467
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	468	def fixup_paras(doc):
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	469	for child in doc.childNodes:
				470	if child.nodeType == xml.dom.core.ELEMENT \
				471	and child.tagName in FIXUP_PARA_ELEMENTS:
				472	fixup_paras_helper(doc, child)
				473	descriptions = child.getElementsByTagName("description")
				474	for description in descriptions:
				475	if DEBUG_PARA_FIXER:
				476	sys.stderr.write("-- Fixing up <description> element...\n")
				477	fixup_paras_helper(doc, description)
				478
				479
				480	def fixup_paras_helper(doc, container):
				481	# document is already normalized
				482	children = container.childNodes
				483	start = 0
				484	start_fixed = 0
Fred Drake	4259f0d	1999-01-19 23:09:31 +0000	[diff] [blame]	485	i = len(children)
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	486	SKIP_ELEMENTS = PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	487	if DEBUG_PARA_FIXER:
				488	sys.stderr.write("fixup_paras_helper() called on <%s>; %d, %d\n"
				489	% (container.tagName, start, i))
				490	if i > start:
				491	# the first [start:i] children shoudl be rewritten as <para> elements
				492	# start by breaking text nodes that contain \n\n+ into multiple nodes
				493	nstart, i = skip_leading_nodes(container.childNodes, start, i)
				494	if i > nstart:
				495	build_para(doc, container, nstart, i)
				496	fixup_paras_helper(doc, container)
				497
				498
				499	def build_para(doc, parent, start, i):
				500	children = parent.childNodes
				501	# collect all children until \n\n+ is found in a text node or a
				502	# PARA_LEVEL_ELEMENT is found.
				503	after = start + 1
				504	have_last = 0
				505	BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + FIXUP_PARA_ELEMENTS
				506	for j in range(start, i):
				507	after = j + 1
				508	child = children[j]
				509	nodeType = child.nodeType
				510	if nodeType == xml.dom.core.ELEMENT:
				511	if child.tagName in BREAK_ELEMENTS:
				512	after = j
				513	break
				514	elif nodeType == xml.dom.core.TEXT:
				515	pos = string.find(child.data, "\n\n")
				516	if pos == 0:
				517	after = j
				518	break
				519	if pos >= 1:
				520	child.splitText(pos)
				521	break
				522	else:
				523	have_last = 1
				524	if children[after - 1].nodeType == xml.dom.core.TEXT:
				525	# we may need to split off trailing white space:
				526	child = children[after - 1]
				527	data = child.data
				528	if string.rstrip(data) != data:
				529	have_last = 0
				530	child.splitText(len(string.rstrip(data)))
				531	children = parent.childNodes
				532	para = doc.createElement("para")
				533	prev = None
				534	indexes = range(start, after)
				535	indexes.reverse()
				536	for j in indexes:
				537	node = children[j]
				538	parent.removeChild(node)
				539	para.insertBefore(node, prev)
				540	prev = node
				541	if have_last:
				542	parent.appendChild(para)
				543	else:
				544	parent.insertBefore(para, parent.childNodes[start])
				545
				546
				547	def skip_leading_nodes(children, start, i):
				548	i = min(i, len(children))
				549	while i > start:
				550	# skip over leading comments and whitespace:
				551	try:
				552	child = children[start]
				553	except IndexError:
				554	sys.stderr.write(
				555	"skip_leading_nodes() failed at index %d\n" % start)
				556	raise
				557	nodeType = child.nodeType
				558	if nodeType == xml.dom.core.COMMENT:
				559	start = start + 1
				560	elif nodeType == xml.dom.core.TEXT:
				561	data = child.data
				562	shortened = string.lstrip(data)
				563	if shortened:
				564	if data != shortened:
				565	# break into two nodes: whitespace and non-whitespace
				566	child.splitText(len(data) - len(shortened))
				567	return start + 1, i + 1
				568	break
				569	# all whitespace, just skip
				570	start = start + 1
				571	elif nodeType == xml.dom.core.ELEMENT:
				572	if child.tagName in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
				573	start = start + 1
				574	else:
				575	break
				576	else:
				577	break
				578	return start, i
Fred Drake	fba0ba2	1998-12-10 05:07:09 +0000	[diff] [blame]	579
				580
Fred Drake	d24167b	1999-01-14 21:18:03 +0000	[diff] [blame]	581	def fixup_rfc_references(doc):
				582	rfc_nodes = []
				583	for child in doc.childNodes:
				584	if child.nodeType == xml.dom.core.ELEMENT:
				585	kids = child.getElementsByTagName("rfc")
				586	for k in kids:
				587	rfc_nodes.append(k)
				588	for rfc_node in rfc_nodes:
				589	rfc_node.appendChild(doc.createTextNode(
				590	"RFC " + rfc_node.getAttribute("num")))
				591
				592
				593	def fixup_signatures(doc):
				594	for child in doc.childNodes:
				595	if child.nodeType == xml.dom.core.ELEMENT:
				596	args = child.getElementsByTagName("args")
				597	for arg in args:
				598	fixup_args(doc, arg)
				599	args = child.getElementsByTagName("constructor-args")
				600	for arg in args:
				601	fixup_args(doc, arg)
				602	arg.normalize()
				603
				604
				605	def fixup_args(doc, arglist):
				606	for child in arglist.childNodes:
				607	if child.nodeType == xml.dom.core.ELEMENT \
				608	and child.tagName == "optional":
				609	# found it; fix and return
				610	arglist.insertBefore(doc.createTextNode("["), child)
				611	optkids = child.childNodes
				612	while optkids:
				613	k = optkids[0]
				614	child.removeChild(k)
				615	arglist.insertBefore(k, child)
				616	arglist.insertBefore(doc.createTextNode("]"), child)
				617	arglist.removeChild(child)
				618	return fixup_args(doc, arglist)
				619
				620
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	621	_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
Fred Drake	fcc5910	1999-01-06 22:50:52 +0000	[diff] [blame]	622
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	623	def write_esis(doc, ofp, knownempty):
				624	for node in doc.childNodes:
				625	nodeType = node.nodeType
				626	if nodeType == xml.dom.core.ELEMENT:
				627	gi = node.tagName
				628	if knownempty(gi):
				629	if node.hasChildNodes():
				630	raise ValueError, "declared-empty node has children"
				631	ofp.write("e\n")
				632	for k, v in node.attributes.items():
				633	value = v.value
				634	if _token_rx.match(value):
				635	dtype = "TOKEN"
				636	else:
				637	dtype = "CDATA"
				638	ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
				639	ofp.write("(%s\n" % gi)
				640	write_esis(node, ofp, knownempty)
				641	ofp.write(")%s\n" % gi)
				642	elif nodeType == xml.dom.core.TEXT:
				643	ofp.write("-%s\n" % esistools.encode(node.data))
				644	else:
				645	raise RuntimeError, "unsupported node type: %s" % nodeType
				646
				647
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	648	def convert(ifp, ofp):
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	649	p = esistools.ExtendedEsisBuilder()
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	650	p.feed(ifp.read())
				651	doc = p.document
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	652	normalize(doc)
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	653	handle_args(doc)
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	654	simplify(doc)
				655	handle_labels(doc)
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	656	handle_appendix(doc)
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	657	fixup_trailing_whitespace(doc, {
				658	"abstract": "\n",
				659	"title": "",
				660	"chapter": "\n\n",
				661	"section": "\n\n",
				662	"subsection": "\n\n",
				663	"subsubsection": "\n\n",
				664	"paragraph": "\n\n",
				665	"subparagraph": "\n\n",
				666	})
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	667	cleanup_root_text(doc)
Fred Drake	1ff6db4	1998-11-23 23:10:35 +0000	[diff] [blame]	668	cleanup_trailing_parens(doc, ["function", "method", "cfunction"])
Fred Drake	fba0ba2	1998-12-10 05:07:09 +0000	[diff] [blame]	669	cleanup_synopses(doc)
Fred Drake	aaed971	1998-12-10 20:25:30 +0000	[diff] [blame]	670	normalize(doc)
				671	fixup_paras(doc)
Fred Drake	f8ebb55	1999-01-14 19:45:38 +0000	[diff] [blame]	672	remap_element_names(doc, {
				673	"tableii": ("table", {"cols": "2"}),
				674	"tableiii": ("table", {"cols": "3"}),
				675	"tableiv": ("table", {"cols": "4"}),
				676	"lineii": ("row", {}),
				677	"lineiii": ("row", {}),
				678	"lineiv": ("row", {}),
Fred Drake	d6ced7d	1999-01-19 17:11:23 +0000	[diff] [blame]	679	"refmodule": ("module", {"link": "link"}),
Fred Drake	f8ebb55	1999-01-14 19:45:38 +0000	[diff] [blame]	680	})
				681	fixup_table_structures(doc)
Fred Drake	d24167b	1999-01-14 21:18:03 +0000	[diff] [blame]	682	fixup_rfc_references(doc)
				683	fixup_signatures(doc)
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	684	#
				685	d = {}
				686	for gi in p.get_empties():
				687	d[gi] = gi
Fred Drake	d24167b	1999-01-14 21:18:03 +0000	[diff] [blame]	688	if d.has_key("rfc"):
				689	del d["rfc"]
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	690	knownempty = d.has_key
				691	#
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	692	try:
Fred Drake	4db5b46	1998-12-01 19:03:01 +0000	[diff] [blame]	693	write_esis(doc, ofp, knownempty)
Fred Drake	0320473	1998-11-23 17:02:03 +0000	[diff] [blame]	694	except IOError, (err, msg):
				695	# Ignore EPIPE; it just means that whoever we're writing to stopped
				696	# reading. The rest of the output would be ignored. All other errors
				697	# should still be reported,
				698	if err != errno.EPIPE:
				699	raise
				700
				701
				702	def main():
				703	if len(sys.argv) == 1:
				704	ifp = sys.stdin
				705	ofp = sys.stdout
				706	elif len(sys.argv) == 2:
				707	ifp = open(sys.argv[1])
				708	ofp = sys.stdout
				709	elif len(sys.argv) == 3:
				710	ifp = open(sys.argv[1])
				711	ofp = open(sys.argv[2], "w")
				712	else:
				713	usage()
				714	sys.exit(2)
				715	convert(ifp, ofp)
				716
				717
				718	if __name__ == "__main__":
				719	main()