Blame - doc/tools/sgmlconv/docfixer.py - platform/external/python/pyopenssl

blob: 463276b0676e5aac2aebd761c4b183c5f3f2e614 [file] [log] [blame]

Jean-Paul Calderone	897bc25	2008-02-18 20:50:23 -0500	[diff] [blame]	1	#! /usr/bin/env python
				2
				3	"""Perform massive transformations on a document tree created from the LaTeX
				4	of the Python documentation, and dump the ESIS data for the transformed tree.
				5	"""
				6
				7
				8	import errno
				9	import esistools
				10	import re
				11	import string
				12	import sys
				13	import xml.dom
				14	import xml.dom.minidom
				15
				16	ELEMENT = xml.dom.Node.ELEMENT_NODE
				17	ENTITY_REFERENCE = xml.dom.Node.ENTITY_REFERENCE_NODE
				18	TEXT = xml.dom.Node.TEXT_NODE
				19
				20
				21	class ConversionError(Exception):
				22	pass
				23
				24
				25	ewrite = sys.stderr.write
				26	try:
				27	# We can only do this trick on Unix (if tput is on $PATH)!
				28	if sys.platform != "posix" or not sys.stderr.isatty():
				29	raise ImportError
				30	import commands
				31	except ImportError:
				32	bwrite = ewrite
				33	else:
				34	def bwrite(s, BOLDON=commands.getoutput("tput bold"),
				35	BOLDOFF=commands.getoutput("tput sgr0")):
				36	ewrite("%s%s%s" % (BOLDON, s, BOLDOFF))
				37
				38
				39	PARA_ELEMENT = "para"
				40
				41	DEBUG_PARA_FIXER = 0
				42
				43	if DEBUG_PARA_FIXER:
				44	def para_msg(s):
				45	ewrite("*** %s\n" % s)
				46	else:
				47	def para_msg(s):
				48	pass
				49
				50
				51	def get_first_element(doc, gi):
				52	for n in doc.childNodes:
				53	if n.nodeName == gi:
				54	return n
				55
				56	def extract_first_element(doc, gi):
				57	node = get_first_element(doc, gi)
				58	if node is not None:
				59	doc.removeChild(node)
				60	return node
				61
				62
				63	def get_documentElement(node):
				64	result = None
				65	for child in node.childNodes:
				66	if child.nodeType == ELEMENT:
				67	result = child
				68	return result
				69
				70
				71	def set_tagName(elem, gi):
				72	elem.nodeName = elem.tagName = gi
				73
				74
				75	def find_all_elements(doc, gi):
				76	nodes = []
				77	if doc.nodeName == gi:
				78	nodes.append(doc)
				79	for child in doc.childNodes:
				80	if child.nodeType == ELEMENT:
				81	if child.tagName == gi:
				82	nodes.append(child)
				83	for node in child.getElementsByTagName(gi):
				84	nodes.append(node)
				85	return nodes
				86
				87	def find_all_child_elements(doc, gi):
				88	nodes = []
				89	for child in doc.childNodes:
				90	if child.nodeName == gi:
				91	nodes.append(child)
				92	return nodes
				93
				94
				95	def find_all_elements_from_set(doc, gi_set):
				96	return __find_all_elements_from_set(doc, gi_set, [])
				97
				98	def __find_all_elements_from_set(doc, gi_set, nodes):
				99	if doc.nodeName in gi_set:
				100	nodes.append(doc)
				101	for child in doc.childNodes:
				102	if child.nodeType == ELEMENT:
				103	__find_all_elements_from_set(child, gi_set, nodes)
				104	return nodes
				105
				106
				107	def simplify(doc, fragment):
				108	# Try to rationalize the document a bit, since these things are simply
				109	# not valid SGML/XML documents as they stand, and need a little work.
				110	documentclass = "document"
				111	inputs = []
				112	node = extract_first_element(fragment, "documentclass")
				113	if node is not None:
				114	documentclass = node.getAttribute("classname")
				115	node = extract_first_element(fragment, "title")
				116	if node is not None:
				117	inputs.append(node)
				118	# update the name of the root element
				119	node = get_first_element(fragment, "document")
				120	if node is not None:
				121	set_tagName(node, documentclass)
				122	while 1:
				123	node = extract_first_element(fragment, "input")
				124	if node is None:
				125	break
				126	inputs.append(node)
				127	if inputs:
				128	docelem = get_documentElement(fragment)
				129	inputs.reverse()
				130	for node in inputs:
				131	text = doc.createTextNode("\n")
				132	docelem.insertBefore(text, docelem.firstChild)
				133	docelem.insertBefore(node, text)
				134	docelem.insertBefore(doc.createTextNode("\n"), docelem.firstChild)
				135	while fragment.firstChild and fragment.firstChild.nodeType == TEXT:
				136	fragment.removeChild(fragment.firstChild)
				137
				138
				139	def cleanup_root_text(doc):
				140	discards = []
				141	skip = 0
				142	for n in doc.childNodes:
				143	prevskip = skip
				144	skip = 0
				145	if n.nodeType == TEXT and not prevskip:
				146	discards.append(n)
				147	elif n.nodeName == "COMMENT":
				148	skip = 1
				149	for node in discards:
				150	doc.removeChild(node)
				151
				152
				153	DESCRIPTOR_ELEMENTS = (
				154	"cfuncdesc", "cvardesc", "ctypedesc",
				155	"classdesc", "memberdesc", "memberdescni", "methoddesc", "methoddescni",
				156	"excdesc", "funcdesc", "funcdescni", "opcodedesc",
				157	"datadesc", "datadescni",
				158	)
				159
				160	def fixup_descriptors(doc, fragment):
				161	sections = find_all_elements(fragment, "section")
				162	for section in sections:
				163	find_and_fix_descriptors(doc, section)
				164
				165
				166	def find_and_fix_descriptors(doc, container):
				167	children = container.childNodes
				168	for child in children:
				169	if child.nodeType == ELEMENT:
				170	tagName = child.tagName
				171	if tagName in DESCRIPTOR_ELEMENTS:
				172	rewrite_descriptor(doc, child)
				173	elif tagName == "subsection":
				174	find_and_fix_descriptors(doc, child)
				175
				176
				177	def rewrite_descriptor(doc, descriptor):
				178	#
				179	# Do these things:
				180	# 1. Add an "index='no'" attribute to the element if the tagName
				181	# ends in 'ni', removing the 'ni' from the name.
				182	# 2. Create a <signature> from the name attribute
				183	# 2a.Create an <args> if it appears to be available.
				184	# 3. Create additional <signature>s from <*line{,ni}> elements,
				185	# if found.
				186	# 4. If a <versionadded> is found, move it to an attribute on the
				187	# descriptor.
				188	# 5. Move remaining child nodes to a <description> element.
				189	# 6. Put it back together.
				190	#
				191	# 1.
				192	descname = descriptor.tagName
				193	index = 1
				194	if descname[-2:] == "ni":
				195	descname = descname[:-2]
				196	descriptor.setAttribute("index", "no")
				197	set_tagName(descriptor, descname)
				198	index = 0
				199	desctype = descname[:-4] # remove 'desc'
				200	linename = desctype + "line"
				201	if not index:
				202	linename = linename + "ni"
				203	# 2.
				204	signature = doc.createElement("signature")
				205	name = doc.createElement("name")
				206	signature.appendChild(doc.createTextNode("\n "))
				207	signature.appendChild(name)
				208	name.appendChild(doc.createTextNode(descriptor.getAttribute("name")))
				209	descriptor.removeAttribute("name")
				210	# 2a.
				211	if descriptor.hasAttribute("var"):
				212	if descname != "opcodedesc":
				213	raise RuntimeError, \
				214	"got 'var' attribute on descriptor other than opcodedesc"
				215	variable = descriptor.getAttribute("var")
				216	if variable:
				217	args = doc.createElement("args")
				218	args.appendChild(doc.createTextNode(variable))
				219	signature.appendChild(doc.createTextNode("\n "))
				220	signature.appendChild(args)
				221	descriptor.removeAttribute("var")
				222	newchildren = [signature]
				223	children = descriptor.childNodes
				224	pos = skip_leading_nodes(children)
				225	if pos < len(children):
				226	child = children[pos]
				227	if child.nodeName == "args":
				228	# move <args> to <signature>, or remove if empty:
				229	child.parentNode.removeChild(child)
				230	if len(child.childNodes):
				231	signature.appendChild(doc.createTextNode("\n "))
				232	signature.appendChild(child)
				233	signature.appendChild(doc.createTextNode("\n "))
				234	# 3, 4.
				235	pos = skip_leading_nodes(children, pos)
				236	while pos < len(children) \
				237	and children[pos].nodeName in (linename, "versionadded"):
				238	if children[pos].tagName == linename:
				239	# this is really a supplemental signature, create <signature>
				240	oldchild = children[pos].cloneNode(1)
				241	try:
				242	sig = methodline_to_signature(doc, children[pos])
				243	except KeyError:
				244	print oldchild.toxml()
				245	raise
				246	newchildren.append(sig)
				247	else:
				248	# <versionadded added=...>
				249	descriptor.setAttribute(
				250	"added", children[pos].getAttribute("version"))
				251	pos = skip_leading_nodes(children, pos + 1)
				252	# 5.
				253	description = doc.createElement("description")
				254	description.appendChild(doc.createTextNode("\n"))
				255	newchildren.append(description)
				256	move_children(descriptor, description, pos)
				257	last = description.childNodes[-1]
				258	if last.nodeType == TEXT:
				259	last.data = string.rstrip(last.data) + "\n "
				260	# 6.
				261	# should have nothing but whitespace and signature lines in <descriptor>;
				262	# discard them
				263	while descriptor.childNodes:
				264	descriptor.removeChild(descriptor.childNodes[0])
				265	for node in newchildren:
				266	descriptor.appendChild(doc.createTextNode("\n "))
				267	descriptor.appendChild(node)
				268	descriptor.appendChild(doc.createTextNode("\n"))
				269
				270
				271	def methodline_to_signature(doc, methodline):
				272	signature = doc.createElement("signature")
				273	signature.appendChild(doc.createTextNode("\n "))
				274	name = doc.createElement("name")
				275	name.appendChild(doc.createTextNode(methodline.getAttribute("name")))
				276	methodline.removeAttribute("name")
				277	signature.appendChild(name)
				278	if len(methodline.childNodes):
				279	args = doc.createElement("args")
				280	signature.appendChild(doc.createTextNode("\n "))
				281	signature.appendChild(args)
				282	move_children(methodline, args)
				283	signature.appendChild(doc.createTextNode("\n "))
				284	return signature
				285
				286
				287	def move_children(origin, dest, start=0):
				288	children = origin.childNodes
				289	while start < len(children):
				290	node = children[start]
				291	origin.removeChild(node)
				292	dest.appendChild(node)
				293
				294
				295	def handle_appendix(doc, fragment):
				296	# must be called after simplfy() if document is multi-rooted to begin with
				297	docelem = get_documentElement(fragment)
				298	toplevel = docelem.tagName == "manual" and "chapter" or "section"
				299	appendices = 0
				300	nodes = []
				301	for node in docelem.childNodes:
				302	if appendices:
				303	nodes.append(node)
				304	elif node.nodeType == ELEMENT:
				305	appnodes = node.getElementsByTagName("appendix")
				306	if appnodes:
				307	appendices = 1
				308	parent = appnodes[0].parentNode
				309	parent.removeChild(appnodes[0])
				310	parent.normalize()
				311	if nodes:
				312	map(docelem.removeChild, nodes)
				313	docelem.appendChild(doc.createTextNode("\n\n\n"))
				314	back = doc.createElement("back-matter")
				315	docelem.appendChild(back)
				316	back.appendChild(doc.createTextNode("\n"))
				317	while nodes and nodes[0].nodeType == TEXT \
				318	and not string.strip(nodes[0].data):
				319	del nodes[0]
				320	map(back.appendChild, nodes)
				321	docelem.appendChild(doc.createTextNode("\n"))
				322
				323
				324	def handle_labels(doc, fragment):
				325	for label in find_all_elements(fragment, "label"):
				326	id = label.getAttribute("id")
				327	if not id:
				328	continue
				329	parent = label.parentNode
				330	parentTagName = parent.tagName
				331	if parentTagName == "title":
				332	parent.parentNode.setAttribute("id", id)
				333	else:
				334	parent.setAttribute("id", id)
				335	# now, remove <label id="..."/> from parent:
				336	parent.removeChild(label)
				337	if parentTagName == "title":
				338	parent.normalize()
				339	children = parent.childNodes
				340	if children[-1].nodeType == TEXT:
				341	children[-1].data = string.rstrip(children[-1].data)
				342
				343
				344	def fixup_trailing_whitespace(doc, wsmap):
				345	queue = [doc]
				346	while queue:
				347	node = queue[0]
				348	del queue[0]
				349	if wsmap.has_key(node.nodeName):
				350	ws = wsmap[node.tagName]
				351	children = node.childNodes
				352	children.reverse()
				353	if children[0].nodeType == TEXT:
				354	data = string.rstrip(children[0].data) + ws
				355	children[0].data = data
				356	children.reverse()
				357	# hack to get the title in place:
				358	if node.tagName == "title" \
				359	and node.parentNode.firstChild.nodeType == ELEMENT:
				360	node.parentNode.insertBefore(doc.createText("\n "),
				361	node.parentNode.firstChild)
				362	for child in node.childNodes:
				363	if child.nodeType == ELEMENT:
				364	queue.append(child)
				365
				366
				367	def normalize(doc):
				368	for node in doc.childNodes:
				369	if node.nodeType == ELEMENT:
				370	node.normalize()
				371
				372
				373	def cleanup_trailing_parens(doc, element_names):
				374	d = {}
				375	for gi in element_names:
				376	d[gi] = gi
				377	rewrite_element = d.has_key
				378	queue = []
				379	for node in doc.childNodes:
				380	if node.nodeType == ELEMENT:
				381	queue.append(node)
				382	while queue:
				383	node = queue[0]
				384	del queue[0]
				385	if rewrite_element(node.tagName):
				386	children = node.childNodes
				387	if len(children) == 1 \
				388	and children[0].nodeType == TEXT:
				389	data = children[0].data
				390	if data[-2:] == "()":
				391	children[0].data = data[:-2]
				392	else:
				393	for child in node.childNodes:
				394	if child.nodeType == ELEMENT:
				395	queue.append(child)
				396
				397
				398	def contents_match(left, right):
				399	left_children = left.childNodes
				400	right_children = right.childNodes
				401	if len(left_children) != len(right_children):
				402	return 0
				403	for l, r in map(None, left_children, right_children):
				404	nodeType = l.nodeType
				405	if nodeType != r.nodeType:
				406	return 0
				407	if nodeType == ELEMENT:
				408	if l.tagName != r.tagName:
				409	return 0
				410	# should check attributes, but that's not a problem here
				411	if not contents_match(l, r):
				412	return 0
				413	elif nodeType == TEXT:
				414	if l.data != r.data:
				415	return 0
				416	else:
				417	# not quite right, but good enough
				418	return 0
				419	return 1
				420
				421
				422	def create_module_info(doc, section):
				423	# Heavy.
				424	node = extract_first_element(section, "modulesynopsis")
				425	if node is None:
				426	return
				427	set_tagName(node, "synopsis")
				428	lastchild = node.childNodes[-1]
				429	if lastchild.nodeType == TEXT \
				430	and lastchild.data[-1:] == ".":
				431	lastchild.data = lastchild.data[:-1]
				432	modauthor = extract_first_element(section, "moduleauthor")
				433	if modauthor:
				434	set_tagName(modauthor, "author")
				435	modauthor.appendChild(doc.createTextNode(
				436	modauthor.getAttribute("name")))
				437	modauthor.removeAttribute("name")
				438	platform = extract_first_element(section, "platform")
				439	if section.tagName == "section":
				440	modinfo_pos = 2
				441	modinfo = doc.createElement("moduleinfo")
				442	moddecl = extract_first_element(section, "declaremodule")
				443	name = None
				444	if moddecl:
				445	modinfo.appendChild(doc.createTextNode("\n "))
				446	name = moddecl.attributes["name"].value
				447	namenode = doc.createElement("name")
				448	namenode.appendChild(doc.createTextNode(name))
				449	modinfo.appendChild(namenode)
				450	type = moddecl.attributes.get("type")
				451	if type:
				452	type = type.value
				453	modinfo.appendChild(doc.createTextNode("\n "))
				454	typenode = doc.createElement("type")
				455	typenode.appendChild(doc.createTextNode(type))
				456	modinfo.appendChild(typenode)
				457	versionadded = extract_first_element(section, "versionadded")
				458	if versionadded:
				459	modinfo.setAttribute("added", versionadded.getAttribute("version"))
				460	title = get_first_element(section, "title")
				461	if title:
				462	children = title.childNodes
				463	if len(children) >= 2 \
				464	and children[0].nodeName == "module" \
				465	and children[0].childNodes[0].data == name:
				466	# this is it; morph the <title> into <short-synopsis>
				467	first_data = children[1]
				468	if first_data.data[:4] == " ---":
				469	first_data.data = string.lstrip(first_data.data[4:])
				470	set_tagName(title, "short-synopsis")
				471	if children[-1].nodeType == TEXT \
				472	and children[-1].data[-1:] == ".":
				473	children[-1].data = children[-1].data[:-1]
				474	section.removeChild(title)
				475	section.removeChild(section.childNodes[0])
				476	title.removeChild(children[0])
				477	modinfo_pos = 0
				478	else:
				479	ewrite("module name in title doesn't match"
				480	" <declaremodule/>; no <short-synopsis/>\n")
				481	else:
				482	ewrite("Unexpected condition: <section/> without <title/>\n")
				483	modinfo.appendChild(doc.createTextNode("\n "))
				484	modinfo.appendChild(node)
				485	if title and not contents_match(title, node):
				486	# The short synopsis is actually different,
				487	# and needs to be stored:
				488	modinfo.appendChild(doc.createTextNode("\n "))
				489	modinfo.appendChild(title)
				490	if modauthor:
				491	modinfo.appendChild(doc.createTextNode("\n "))
				492	modinfo.appendChild(modauthor)
				493	if platform:
				494	modinfo.appendChild(doc.createTextNode("\n "))
				495	modinfo.appendChild(platform)
				496	modinfo.appendChild(doc.createTextNode("\n "))
				497	section.insertBefore(modinfo, section.childNodes[modinfo_pos])
				498	section.insertBefore(doc.createTextNode("\n "), modinfo)
				499	#
				500	# The rest of this removes extra newlines from where we cut out
				501	# a lot of elements. A lot of code for minimal value, but keeps
				502	# keeps the generated *ML from being too funny looking.
				503	#
				504	section.normalize()
				505	children = section.childNodes
				506	for i in range(len(children)):
				507	node = children[i]
				508	if node.nodeName == "moduleinfo":
				509	nextnode = children[i+1]
				510	if nextnode.nodeType == TEXT:
				511	data = nextnode.data
				512	if len(string.lstrip(data)) < (len(data) - 4):
				513	nextnode.data = "\n\n\n" + string.lstrip(data)
				514
				515
				516	def cleanup_synopses(doc, fragment):
				517	for node in find_all_elements(fragment, "section"):
				518	create_module_info(doc, node)
				519
				520
				521	def fixup_table_structures(doc, fragment):
				522	for table in find_all_elements(fragment, "table"):
				523	fixup_table(doc, table)
				524
				525
				526	def fixup_table(doc, table):
				527	# create the table head
				528	thead = doc.createElement("thead")
				529	row = doc.createElement("row")
				530	move_elements_by_name(doc, table, row, "entry")
				531	thead.appendChild(doc.createTextNode("\n "))
				532	thead.appendChild(row)
				533	thead.appendChild(doc.createTextNode("\n "))
				534	# create the table body
				535	tbody = doc.createElement("tbody")
				536	prev_row = None
				537	last_was_hline = 0
				538	children = table.childNodes
				539	for child in children:
				540	if child.nodeType == ELEMENT:
				541	tagName = child.tagName
				542	if tagName == "hline" and prev_row is not None:
				543	prev_row.setAttribute("rowsep", "1")
				544	elif tagName == "row":
				545	prev_row = child
				546	# save the rows:
				547	tbody.appendChild(doc.createTextNode("\n "))
				548	move_elements_by_name(doc, table, tbody, "row", sep="\n ")
				549	# and toss the rest:
				550	while children:
				551	child = children[0]
				552	nodeType = child.nodeType
				553	if nodeType == TEXT:
				554	if string.strip(child.data):
				555	raise ConversionError("unexpected free data in <%s>: %r"
				556	% (table.tagName, child.data))
				557	table.removeChild(child)
				558	continue
				559	if nodeType == ELEMENT:
				560	if child.tagName != "hline":
				561	raise ConversionError(
				562	"unexpected <%s> in table" % child.tagName)
				563	table.removeChild(child)
				564	continue
				565	raise ConversionError(
				566	"unexpected %s node in table" % child.__class__.__name__)
				567	# nothing left in the <table>; add the <thead> and <tbody>
				568	tgroup = doc.createElement("tgroup")
				569	tgroup.appendChild(doc.createTextNode("\n "))
				570	tgroup.appendChild(thead)
				571	tgroup.appendChild(doc.createTextNode("\n "))
				572	tgroup.appendChild(tbody)
				573	tgroup.appendChild(doc.createTextNode("\n "))
				574	table.appendChild(tgroup)
				575	# now make the <entry>s look nice:
				576	for row in table.getElementsByTagName("row"):
				577	fixup_row(doc, row)
				578
				579
				580	def fixup_row(doc, row):
				581	entries = []
				582	map(entries.append, row.childNodes[1:])
				583	for entry in entries:
				584	row.insertBefore(doc.createTextNode("\n "), entry)
				585	# row.appendChild(doc.createTextNode("\n "))
				586
				587
				588	def move_elements_by_name(doc, source, dest, name, sep=None):
				589	nodes = []
				590	for child in source.childNodes:
				591	if child.nodeName == name:
				592	nodes.append(child)
				593	for node in nodes:
				594	source.removeChild(node)
				595	dest.appendChild(node)
				596	if sep:
				597	dest.appendChild(doc.createTextNode(sep))
				598
				599
				600	RECURSE_INTO_PARA_CONTAINERS = (
				601	"chapter", "abstract", "enumerate",
				602	"section", "subsection", "subsubsection",
				603	"paragraph", "subparagraph", "back-matter",
				604	"howto", "manual",
				605	"item", "itemize", "fulllineitems", "enumeration", "descriptionlist",
				606	"definitionlist", "definition",
				607	)
				608
				609	PARA_LEVEL_ELEMENTS = (
				610	"moduleinfo", "title", "verbatim", "enumerate", "item",
				611	"interpreter-session", "back-matter", "interactive-session",
				612	"opcodedesc", "classdesc", "datadesc",
				613	"funcdesc", "methoddesc", "excdesc", "memberdesc", "membderdescni",
				614	"funcdescni", "methoddescni", "excdescni",
				615	"tableii", "tableiii", "tableiv", "localmoduletable",
				616	"sectionauthor", "seealso", "itemize",
				617	# include <para>, so we can just do it again to get subsequent paras:
				618	PARA_ELEMENT,
				619	)
				620
				621	PARA_LEVEL_PRECEEDERS = (
				622	"setindexsubitem", "author",
				623	"stindex", "obindex", "COMMENT", "label", "input", "title",
				624	"versionadded", "versionchanged", "declaremodule", "modulesynopsis",
				625	"moduleauthor", "indexterm", "leader",
				626	)
				627
				628
				629	def fixup_paras(doc, fragment):
				630	for child in fragment.childNodes:
				631	if child.nodeName in RECURSE_INTO_PARA_CONTAINERS:
				632	fixup_paras_helper(doc, child)
				633	descriptions = find_all_elements(fragment, "description")
				634	for description in descriptions:
				635	fixup_paras_helper(doc, description)
				636
				637
				638	def fixup_paras_helper(doc, container, depth=0):
				639	# document is already normalized
				640	children = container.childNodes
				641	start = skip_leading_nodes(children)
				642	while len(children) > start:
				643	if children[start].nodeName in RECURSE_INTO_PARA_CONTAINERS:
				644	# Something to recurse into:
				645	fixup_paras_helper(doc, children[start])
				646	else:
				647	# Paragraph material:
				648	build_para(doc, container, start, len(children))
				649	if DEBUG_PARA_FIXER and depth == 10:
				650	sys.exit(1)
				651	start = skip_leading_nodes(children, start + 1)
				652
				653
				654	def build_para(doc, parent, start, i):
				655	children = parent.childNodes
				656	after = start + 1
				657	have_last = 0
				658	BREAK_ELEMENTS = PARA_LEVEL_ELEMENTS + RECURSE_INTO_PARA_CONTAINERS
				659	# Collect all children until \n\n+ is found in a text node or a
				660	# member of BREAK_ELEMENTS is found.
				661	for j in range(start, i):
				662	after = j + 1
				663	child = children[j]
				664	nodeType = child.nodeType
				665	if nodeType == ELEMENT:
				666	if child.tagName in BREAK_ELEMENTS:
				667	after = j
				668	break
				669	elif nodeType == TEXT:
				670	pos = string.find(child.data, "\n\n")
				671	if pos == 0:
				672	after = j
				673	break
				674	if pos >= 1:
				675	child.splitText(pos)
				676	break
				677	else:
				678	have_last = 1
				679	if (start + 1) > after:
				680	raise ConversionError(
				681	"build_para() could not identify content to turn into a paragraph")
				682	if children[after - 1].nodeType == TEXT:
				683	# we may need to split off trailing white space:
				684	child = children[after - 1]
				685	data = child.data
				686	if string.rstrip(data) != data:
				687	have_last = 0
				688	child.splitText(len(string.rstrip(data)))
				689	para = doc.createElement(PARA_ELEMENT)
				690	prev = None
				691	indexes = range(start, after)
				692	indexes.reverse()
				693	for j in indexes:
				694	node = parent.childNodes[j]
				695	parent.removeChild(node)
				696	para.insertBefore(node, prev)
				697	prev = node
				698	if have_last:
				699	parent.appendChild(para)
				700	parent.appendChild(doc.createTextNode("\n\n"))
				701	return len(parent.childNodes)
				702	else:
				703	nextnode = parent.childNodes[start]
				704	if nextnode.nodeType == TEXT:
				705	if nextnode.data and nextnode.data[0] != "\n":
				706	nextnode.data = "\n" + nextnode.data
				707	else:
				708	newnode = doc.createTextNode("\n")
				709	parent.insertBefore(newnode, nextnode)
				710	nextnode = newnode
				711	start = start + 1
				712	parent.insertBefore(para, nextnode)
				713	return start + 1
				714
				715
				716	def skip_leading_nodes(children, start=0):
				717	"""Return index into children of a node at which paragraph building should
				718	begin or a recursive call to fixup_paras_helper() should be made (for
				719	subsections, etc.).
				720
				721	When the return value >= len(children), we've built all the paras we can
				722	from this list of children.
				723	"""
				724	i = len(children)
				725	while i > start:
				726	# skip over leading comments and whitespace:
				727	child = children[start]
				728	nodeType = child.nodeType
				729	if nodeType == TEXT:
				730	data = child.data
				731	shortened = string.lstrip(data)
				732	if shortened:
				733	if data != shortened:
				734	# break into two nodes: whitespace and non-whitespace
				735	child.splitText(len(data) - len(shortened))
				736	return start + 1
				737	return start
				738	# all whitespace, just skip
				739	elif nodeType == ELEMENT:
				740	tagName = child.tagName
				741	if tagName in RECURSE_INTO_PARA_CONTAINERS:
				742	return start
				743	if tagName not in PARA_LEVEL_ELEMENTS + PARA_LEVEL_PRECEEDERS:
				744	return start
				745	start = start + 1
				746	return start
				747
				748
				749	def fixup_rfc_references(doc, fragment):
				750	for rfcnode in find_all_elements(fragment, "rfc"):
				751	rfcnode.appendChild(doc.createTextNode(
				752	"RFC " + rfcnode.getAttribute("num")))
				753
				754
				755	def fixup_signatures(doc, fragment):
				756	for child in fragment.childNodes:
				757	if child.nodeType == ELEMENT:
				758	args = child.getElementsByTagName("args")
				759	for arg in args:
				760	fixup_args(doc, arg)
				761	arg.normalize()
				762	args = child.getElementsByTagName("constructor-args")
				763	for arg in args:
				764	fixup_args(doc, arg)
				765	arg.normalize()
				766
				767
				768	def fixup_args(doc, arglist):
				769	for child in arglist.childNodes:
				770	if child.nodeName == "optional":
				771	# found it; fix and return
				772	arglist.insertBefore(doc.createTextNode("["), child)
				773	optkids = child.childNodes
				774	while optkids:
				775	k = optkids[0]
				776	child.removeChild(k)
				777	arglist.insertBefore(k, child)
				778	arglist.insertBefore(doc.createTextNode("]"), child)
				779	arglist.removeChild(child)
				780	return fixup_args(doc, arglist)
				781
				782
				783	def fixup_sectionauthors(doc, fragment):
				784	for sectauth in find_all_elements(fragment, "sectionauthor"):
				785	section = sectauth.parentNode
				786	section.removeChild(sectauth)
				787	set_tagName(sectauth, "author")
				788	sectauth.appendChild(doc.createTextNode(
				789	sectauth.getAttribute("name")))
				790	sectauth.removeAttribute("name")
				791	after = section.childNodes[2]
				792	title = section.childNodes[1]
				793	if title.nodeName != "title":
				794	after = section.childNodes[0]
				795	section.insertBefore(doc.createTextNode("\n "), after)
				796	section.insertBefore(sectauth, after)
				797
				798
				799	def fixup_verbatims(doc):
				800	for verbatim in find_all_elements(doc, "verbatim"):
				801	child = verbatim.childNodes[0]
				802	if child.nodeType == TEXT \
				803	and string.lstrip(child.data)[:3] == ">>>":
				804	set_tagName(verbatim, "interactive-session")
				805
				806
				807	def add_node_ids(fragment, counter=0):
				808	fragment.node_id = counter
				809	for node in fragment.childNodes:
				810	counter = counter + 1
				811	if node.nodeType == ELEMENT:
				812	counter = add_node_ids(node, counter)
				813	else:
				814	node.node_id = counter
				815	return counter + 1
				816
				817
				818	REFMODINDEX_ELEMENTS = ('refmodindex', 'refbimodindex',
				819	'refexmodindex', 'refstmodindex')
				820
				821	def fixup_refmodindexes(fragment):
				822	# Locate <ref*modindex>...</> co-located with <module>...</>, and
				823	# remove the <ref*modindex>, replacing it with index=index on the
				824	# <module> element.
				825	nodes = find_all_elements_from_set(fragment, REFMODINDEX_ELEMENTS)
				826	d = {}
				827	for node in nodes:
				828	parent = node.parentNode
				829	d[parent.node_id] = parent
				830	del nodes
				831	map(fixup_refmodindexes_chunk, d.values())
				832
				833
				834	def fixup_refmodindexes_chunk(container):
				835	# node is probably a <para>; let's see how often it isn't:
				836	if container.tagName != PARA_ELEMENT:
				837	bwrite("--- fixup_refmodindexes_chunk(%s)\n" % container)
				838	module_entries = find_all_elements(container, "module")
				839	if not module_entries:
				840	return
				841	index_entries = find_all_elements_from_set(container, REFMODINDEX_ELEMENTS)
				842	removes = []
				843	for entry in index_entries:
				844	children = entry.childNodes
				845	if len(children) != 0:
				846	bwrite("--- unexpected number of children for %s node:\n"
				847	% entry.tagName)
				848	ewrite(entry.toxml() + "\n")
				849	continue
				850	found = 0
				851	module_name = entry.getAttribute("module")
				852	for node in module_entries:
				853	if len(node.childNodes) != 1:
				854	continue
				855	this_name = node.childNodes[0].data
				856	if this_name == module_name:
				857	found = 1
				858	node.setAttribute("index", "yes")
				859	if found:
				860	removes.append(entry)
				861	for node in removes:
				862	container.removeChild(node)
				863
				864
				865	def fixup_bifuncindexes(fragment):
				866	nodes = find_all_elements(fragment, 'bifuncindex')
				867	d = {}
				868	# make sure that each parent is only processed once:
				869	for node in nodes:
				870	parent = node.parentNode
				871	d[parent.node_id] = parent
				872	del nodes
				873	map(fixup_bifuncindexes_chunk, d.values())
				874
				875
				876	def fixup_bifuncindexes_chunk(container):
				877	removes = []
				878	entries = find_all_child_elements(container, "bifuncindex")
				879	function_entries = find_all_child_elements(container, "function")
				880	for entry in entries:
				881	function_name = entry.getAttribute("name")
				882	found = 0
				883	for func_entry in function_entries:
				884	t2 = func_entry.childNodes[0].data
				885	if t2[-2:] != "()":
				886	continue
				887	t2 = t2[:-2]
				888	if t2 == function_name:
				889	func_entry.setAttribute("index", "yes")
				890	func_entry.setAttribute("module", "__builtin__")
				891	if not found:
				892	found = 1
				893	removes.append(entry)
				894	for entry in removes:
				895	container.removeChild(entry)
				896
				897
				898	def join_adjacent_elements(container, gi):
				899	queue = [container]
				900	while queue:
				901	parent = queue.pop()
				902	i = 0
				903	children = parent.childNodes
				904	nchildren = len(children)
				905	while i < (nchildren - 1):
				906	child = children[i]
				907	if child.nodeName == gi:
				908	if children[i+1].nodeName == gi:
				909	ewrite("--- merging two <%s/> elements\n" % gi)
				910	child = children[i]
				911	nextchild = children[i+1]
				912	nextchildren = nextchild.childNodes
				913	while len(nextchildren):
				914	node = nextchildren[0]
				915	nextchild.removeChild(node)
				916	child.appendChild(node)
				917	parent.removeChild(nextchild)
				918	continue
				919	if child.nodeType == ELEMENT:
				920	queue.append(child)
				921	i = i + 1
				922
				923
				924	_token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
				925
				926	def write_esis(doc, ofp, knownempty):
				927	for node in doc.childNodes:
				928	nodeType = node.nodeType
				929	if nodeType == ELEMENT:
				930	gi = node.tagName
				931	if knownempty(gi):
				932	if node.hasChildNodes():
				933	raise ValueError, \
				934	"declared-empty node <%s> has children" % gi
				935	ofp.write("e\n")
				936	for k, value in node.attributes.items():
				937	if _token_rx.match(value):
				938	dtype = "TOKEN"
				939	else:
				940	dtype = "CDATA"
				941	ofp.write("A%s %s %s\n" % (k, dtype, esistools.encode(value)))
				942	ofp.write("(%s\n" % gi)
				943	write_esis(node, ofp, knownempty)
				944	ofp.write(")%s\n" % gi)
				945	elif nodeType == TEXT:
				946	ofp.write("-%s\n" % esistools.encode(node.data))
				947	elif nodeType == ENTITY_REFERENCE:
				948	ofp.write("&%s\n" % node.nodeName)
				949	else:
				950	raise RuntimeError, "unsupported node type: %s" % nodeType
				951
				952
				953	def convert(ifp, ofp):
				954	events = esistools.parse(ifp)
				955	toktype, doc = events.getEvent()
				956	fragment = doc.createDocumentFragment()
				957	events.expandNode(fragment)
				958
				959	normalize(fragment)
				960	simplify(doc, fragment)
				961	handle_labels(doc, fragment)
				962	handle_appendix(doc, fragment)
				963	fixup_trailing_whitespace(doc, {
				964	"abstract": "\n",
				965	"title": "",
				966	"chapter": "\n\n",
				967	"section": "\n\n",
				968	"subsection": "\n\n",
				969	"subsubsection": "\n\n",
				970	"paragraph": "\n\n",
				971	"subparagraph": "\n\n",
				972	})
				973	cleanup_root_text(doc)
				974	cleanup_trailing_parens(fragment, ["function", "method", "cfunction"])
				975	cleanup_synopses(doc, fragment)
				976	fixup_descriptors(doc, fragment)
				977	fixup_verbatims(fragment)
				978	normalize(fragment)
				979	fixup_paras(doc, fragment)
				980	fixup_sectionauthors(doc, fragment)
				981	fixup_table_structures(doc, fragment)
				982	fixup_rfc_references(doc, fragment)
				983	fixup_signatures(doc, fragment)
				984	add_node_ids(fragment)
				985	fixup_refmodindexes(fragment)
				986	fixup_bifuncindexes(fragment)
				987	# Take care of ugly hacks in the LaTeX markup to avoid LaTeX and
				988	# LaTeX2HTML screwing with GNU-style long options (the '--' problem).
				989	join_adjacent_elements(fragment, "option")
				990	#
				991	d = {}
				992	for gi in events.parser.get_empties():
				993	d[gi] = gi
				994	if d.has_key("author"):
				995	del d["author"]
				996	if d.has_key("rfc"):
				997	del d["rfc"]
				998	knownempty = d.has_key
				999	#
				1000	try:
				1001	write_esis(fragment, ofp, knownempty)
				1002	except IOError, (err, msg):
				1003	# Ignore EPIPE; it just means that whoever we're writing to stopped
				1004	# reading. The rest of the output would be ignored. All other errors
				1005	# should still be reported,
				1006	if err != errno.EPIPE:
				1007	raise
				1008
				1009
				1010	def main():
				1011	if len(sys.argv) == 1:
				1012	ifp = sys.stdin
				1013	ofp = sys.stdout
				1014	elif len(sys.argv) == 2:
				1015	ifp = open(sys.argv[1])
				1016	ofp = sys.stdout
				1017	elif len(sys.argv) == 3:
				1018	ifp = open(sys.argv[1])
				1019	import StringIO
				1020	ofp = StringIO.StringIO()
				1021	else:
				1022	usage()
				1023	sys.exit(2)
				1024	convert(ifp, ofp)
				1025	if len(sys.argv) == 3:
				1026	fp = open(sys.argv[2], "w")
				1027	fp.write(ofp.getvalue())
				1028	fp.close()
				1029	ofp.close()
				1030
				1031
				1032	if __name__ == "__main__":
				1033	main()