Blame - python/helpers/epydoc/markup/epytext.py - platform/tools/idea

blob: 058c5fa55676af64949ab3875d855f791558ffaf [file] [log] [blame]

Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame^]	1	#
				2	# epytext.py: epydoc formatted docstring parsing
				3	# Edward Loper
				4	#
				5	# Created [04/10/01 12:00 AM]
				6	# $Id: epytext.py 1652 2007-09-26 04:45:34Z edloper $
				7	#
				8
				9	"""
				10	Parser for epytext strings. Epytext is a lightweight markup whose
				11	primary intended application is Python documentation strings. This
				12	parser converts Epytext strings to a simple DOM-like representation
				13	(encoded as a tree of L{Element} objects and strings). Epytext
				14	strings can contain the following X{structural blocks}:
				15
				16	- X{epytext}: The top-level element of the DOM tree.
				17	- X{para}: A paragraph of text. Paragraphs contain no newlines,
				18	and all spaces are soft.
				19	- X{section}: A section or subsection.
				20	- X{field}: A tagged field. These fields provide information
				21	about specific aspects of a Python object, such as the
				22	description of a function's parameter, or the author of a
				23	module.
				24	- X{literalblock}: A block of literal text. This text should be
				25	displayed as it would be displayed in plaintext. The
				26	parser removes the appropriate amount of leading whitespace
				27	from each line in the literal block.
				28	- X{doctestblock}: A block containing sample python code,
				29	formatted according to the specifications of the C{doctest}
				30	module.
				31	- X{ulist}: An unordered list.
				32	- X{olist}: An ordered list.
				33	- X{li}: A list item. This tag is used both for unordered list
				34	items and for ordered list items.
				35
				36	Additionally, the following X{inline regions} may be used within
				37	C{para} blocks:
				38
				39	- X{code}: Source code and identifiers.
				40	- X{math}: Mathematical expressions.
				41	- X{index}: A term which should be included in an index, if one
				42	is generated.
				43	- X{italic}: Italicized text.
				44	- X{bold}: Bold-faced text.
				45	- X{uri}: A Universal Resource Indicator (URI) or Universal
				46	Resource Locator (URL)
				47	- X{link}: A Python identifier which should be hyperlinked to
				48	the named object's documentation, when possible.
				49
				50	The returned DOM tree will conform to the the following Document Type
				51	Description::
				52
				53	<!ENTITY % colorized '(code \| math \| index \| italic \|
				54	bold \| uri \| link \| symbol)*'>
				55
				56	<!ELEMENT epytext ((para \| literalblock \| doctestblock \|
				57	section \| ulist \| olist)*, fieldlist?)>
				58
				59	<!ELEMENT para (#PCDATA \| %colorized;)*>
				60
				61	<!ELEMENT section (para \| listblock \| doctestblock \|
				62	section \| ulist \| olist)+>
				63
				64	<!ELEMENT fieldlist (field+)>
				65	<!ELEMENT field (tag, arg?, (para \| listblock \| doctestblock)
				66	ulist \| olist)+)>
				67	<!ELEMENT tag (#PCDATA)>
				68	<!ELEMENT arg (#PCDATA)>
				69
				70	<!ELEMENT literalblock (#PCDATA \| %colorized;)*>
				71	<!ELEMENT doctestblock (#PCDATA)>
				72
				73	<!ELEMENT ulist (li+)>
				74	<!ELEMENT olist (li+)>
				75	<!ELEMENT li (para \| literalblock \| doctestblock \| ulist \| olist)+>
				76	<!ATTLIST li bullet NMTOKEN #IMPLIED>
				77	<!ATTLIST olist start NMTOKEN #IMPLIED>
				78
				79	<!ELEMENT uri (name, target)>
				80	<!ELEMENT link (name, target)>
				81	<!ELEMENT name (#PCDATA \| %colorized;)*>
				82	<!ELEMENT target (#PCDATA)>
				83
				84	<!ELEMENT code (#PCDATA \| %colorized;)*>
				85	<!ELEMENT math (#PCDATA \| %colorized;)*>
				86	<!ELEMENT italic (#PCDATA \| %colorized;)*>
				87	<!ELEMENT bold (#PCDATA \| %colorized;)*>
				88	<!ELEMENT indexed (#PCDATA \| %colorized;)>
				89	<!ATTLIST code style CDATA #IMPLIED>
				90
				91	<!ELEMENT symbol (#PCDATA)>
				92
				93	@var SYMBOLS: A list of the of escape symbols that are supported
				94	by epydoc. Currently the following symbols are supported:
				95	<<<SYMBOLS>>>
				96	"""
				97	# Note: the symbol list is appended to the docstring automatically,
				98	# below.
				99
				100	__docformat__ = 'epytext en'
				101
				102	# Code organization..
				103	# 1. parse()
				104	# 2. tokenize()
				105	# 3. colorize()
				106	# 4. helpers
				107	# 5. testing
				108
				109	import re, string, types, sys, os.path
				110	from epydoc.markup import *
				111	from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex
				112	from epydoc.markup.doctest import doctest_to_html, doctest_to_latex
				113
				114	##################################################
				115	## DOM-Like Encoding
				116	##################################################
				117
				118	class Element:
				119	"""
				120	A very simple DOM-like representation for parsed epytext
				121	documents. Each epytext document is encoded as a tree whose nodes
				122	are L{Element} objects, and whose leaves are C{string}s. Each
				123	node is marked by a I{tag} and zero or more I{attributes}. Each
				124	attribute is a mapping from a string key to a string value.
				125	"""
				126	def __init__(self, tag, children, *attribs):
				127	self.tag = tag
				128	"""A string tag indicating the type of this element.
				129	@type: C{string}"""
				130
				131	self.children = list(children)
				132	"""A list of the children of this element.
				133	@type: C{list} of (C{string} or C{Element})"""
				134
				135	self.attribs = attribs
				136	"""A dictionary mapping attribute names to attribute values
				137	for this element.
				138	@type: C{dict} from C{string} to C{string}"""
				139
				140	def __str__(self):
				141	"""
				142	Return a string representation of this element, using XML
				143	notation.
				144	@bug: Doesn't escape '<' or '&' or '>'.
				145	"""
				146	attribs = ''.join([' %s=%r' % t for t in self.attribs.items()])
				147	return ('<%s%s>' % (self.tag, attribs) +
				148	''.join([str(child) for child in self.children]) +
				149	'</%s>' % self.tag)
				150
				151	def __repr__(self):
				152	attribs = ''.join([', %s=%r' % t for t in self.attribs.items()])
				153	args = ''.join([', %r' % c for c in self.children])
				154	return 'Element(%s%s%s)' % (self.tag, args, attribs)
				155
				156	##################################################
				157	## Constants
				158	##################################################
				159
				160	# The possible heading underline characters, listed in order of
				161	# heading depth.
				162	_HEADING_CHARS = "=-~"
				163
				164	# Escape codes. These should be needed very rarely.
				165	_ESCAPES = {'lb':'{', 'rb': '}'}
				166
				167	# Symbols. These can be generated via S{...} escapes.
				168	SYMBOLS = [
				169	# Arrows
				170	'<-', '->', '^', 'v',
				171
				172	# Greek letters
				173	'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta',
				174	'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu',
				175	'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma',
				176	'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega',
				177	'Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta',
				178	'Eta', 'Theta', 'Iota', 'Kappa', 'Lambda', 'Mu',
				179	'Nu', 'Xi', 'Omicron', 'Pi', 'Rho', 'Sigma',
				180	'Tau', 'Upsilon', 'Phi', 'Chi', 'Psi', 'Omega',
				181
				182	# HTML character entities
				183	'larr', 'rarr', 'uarr', 'darr', 'harr', 'crarr',
				184	'lArr', 'rArr', 'uArr', 'dArr', 'hArr',
				185	'copy', 'times', 'forall', 'exist', 'part',
				186	'empty', 'isin', 'notin', 'ni', 'prod', 'sum',
				187	'prop', 'infin', 'ang', 'and', 'or', 'cap', 'cup',
				188	'int', 'there4', 'sim', 'cong', 'asymp', 'ne',
				189	'equiv', 'le', 'ge', 'sub', 'sup', 'nsub',
				190	'sube', 'supe', 'oplus', 'otimes', 'perp',
				191
				192	# Alternate (long) names
				193	'infinity', 'integral', 'product',
				194	'>=', '<=',
				195	]
				196	# Convert to a dictionary, for quick lookup
				197	_SYMBOLS = {}
				198	for symbol in SYMBOLS: _SYMBOLS[symbol] = 1
				199
				200	# Add symbols to the docstring.
				201	symblist = ' '
				202	symblist += ';\n '.join([' - C{E{S}{%s}}=S{%s}' % (symbol, symbol)
				203	for symbol in SYMBOLS])
				204	__doc__ = __doc__.replace('<<<SYMBOLS>>>', symblist)
				205	del symbol, symblist
				206
				207	# Tags for colorizing text.
				208	_COLORIZING_TAGS = {
				209	'C': 'code',
				210	'M': 'math',
				211	'X': 'indexed',
				212	'I': 'italic',
				213	'B': 'bold',
				214	'U': 'uri',
				215	'L': 'link', # A Python identifier that should be linked to
				216	'E': 'escape', # escapes characters or creates symbols
				217	'S': 'symbol',
				218	'G': 'graph',
				219	}
				220
				221	# Which tags can use "link syntax" (e.g., U{Python<www.python.org>})?
				222	_LINK_COLORIZING_TAGS = ['link', 'uri']
				223
				224	##################################################
				225	## Structuring (Top Level)
				226	##################################################
				227
				228	def parse(str, errors = None):
				229	"""
				230	Return a DOM tree encoding the contents of an epytext string. Any
				231	errors generated during parsing will be stored in C{errors}.
				232
				233	@param str: The epytext string to parse.
				234	@type str: C{string}
				235	@param errors: A list where any errors generated during parsing
				236	will be stored. If no list is specified, then fatal errors
				237	will generate exceptions, and non-fatal errors will be
				238	ignored.
				239	@type errors: C{list} of L{ParseError}
				240	@return: a DOM tree encoding the contents of an epytext string.
				241	@rtype: C{Element}
				242	@raise ParseError: If C{errors} is C{None} and an error is
				243	encountered while parsing.
				244	"""
				245	# Initialize errors list.
				246	if errors == None:
				247	errors = []
				248	raise_on_error = 1
				249	else:
				250	raise_on_error = 0
				251
				252	# Preprocess the string.
				253	str = re.sub('\015\012', '\012', str)
				254	str = string.expandtabs(str)
				255
				256	# Tokenize the input string.
				257	tokens = _tokenize(str, errors)
				258
				259	# Have we encountered a field yet?
				260	encountered_field = 0
				261
				262	# Create an document to hold the epytext.
				263	doc = Element('epytext')
				264
				265	# Maintain two parallel stacks: one contains DOM elements, and
				266	# gives the ancestors of the current block. The other contains
				267	# indentation values, and gives the indentation of the
				268	# corresponding DOM elements. An indentation of "None" reflects
				269	# an unknown indentation. However, the indentation must be
				270	# greater than, or greater than or equal to, the indentation of
				271	# the prior element (depending on what type of DOM element it
				272	# corresponds to). No 2 consecutive indent_stack values will be
				273	# ever be "None." Use initial dummy elements in the stack, so we
				274	# don't have to worry about bounds checking.
				275	stack = [None, doc]
				276	indent_stack = [-1, None]
				277
				278	for token in tokens:
				279	# Uncomment this for debugging:
				280	#print ('%s: %s\n%s: %s\n' %
				281	# (''.join(['%-11s' % (t and t.tag) for t in stack]),
				282	# token.tag, ''.join(['%-11s' % i for i in indent_stack]),
				283	# token.indent))
				284
				285	# Pop any completed blocks off the stack.
				286	_pop_completed_blocks(token, stack, indent_stack)
				287
				288	# If Token has type PARA, colorize and add the new paragraph
				289	if token.tag == Token.PARA:
				290	_add_para(doc, token, stack, indent_stack, errors)
				291
				292	# If Token has type HEADING, add the new section
				293	elif token.tag == Token.HEADING:
				294	_add_section(doc, token, stack, indent_stack, errors)
				295
				296	# If Token has type LBLOCK, add the new literal block
				297	elif token.tag == Token.LBLOCK:
				298	stack[-1].children.append(token.to_dom(doc))
				299
				300	# If Token has type DTBLOCK, add the new doctest block
				301	elif token.tag == Token.DTBLOCK:
				302	stack[-1].children.append(token.to_dom(doc))
				303
				304	# If Token has type BULLET, add the new list/list item/field
				305	elif token.tag == Token.BULLET:
				306	_add_list(doc, token, stack, indent_stack, errors)
				307	else:
				308	assert 0, 'Unknown token type: '+token.tag
				309
				310	# Check if the DOM element we just added was a field..
				311	if stack[-1].tag == 'field':
				312	encountered_field = 1
				313	elif encountered_field == 1:
				314	if len(stack) <= 3:
				315	estr = ("Fields must be the final elements in an "+
				316	"epytext string.")
				317	errors.append(StructuringError(estr, token.startline))
				318
				319	# Graphs use inline markup (G{...}) but are really block-level
				320	# elements; so "raise" any graphs we generated. This is a bit of
				321	# a hack, but the alternative is to define a new markup for
				322	# block-level elements, which I'd rather not do. (See sourceforge
				323	# bug #1673017.)
				324	for child in doc.children:
				325	_raise_graphs(child, doc)
				326
				327	# If there was an error, then signal it!
				328	if len([e for e in errors if e.is_fatal()]) > 0:
				329	if raise_on_error:
				330	raise errors[0]
				331	else:
				332	return None
				333
				334	# Return the top-level epytext DOM element.
				335	return doc
				336
				337	def _raise_graphs(tree, parent):
				338	# Recurse to children.
				339	have_graph_child = False
				340	for elt in tree.children:
				341	if isinstance(elt, Element):
				342	_raise_graphs(elt, tree)
				343	if elt.tag == 'graph': have_graph_child = True
				344
				345	block = ('section', 'fieldlist', 'field', 'ulist', 'olist', 'li')
				346	if have_graph_child and tree.tag not in block:
				347	child_index = 0
				348	for elt in tree.children:
				349	if isinstance(elt, Element) and elt.tag == 'graph':
				350	# We found a graph: splice it into the parent.
				351	parent_index = parent.children.index(tree)
				352	left = tree.children[:child_index]
				353	right = tree.children[child_index+1:]
				354	parent.children[parent_index:parent_index+1] = [
				355	Element(tree.tag, left, *tree.attribs),
				356	elt,
				357	Element(tree.tag, right, *tree.attribs)]
				358	child_index = 0
				359	parent_index += 2
				360	else:
				361	child_index += 1
				362
				363	def _pop_completed_blocks(token, stack, indent_stack):
				364	"""
				365	Pop any completed blocks off the stack. This includes any
				366	blocks that we have dedented past, as well as any list item
				367	blocks that we've dedented to. The top element on the stack
				368	should only be a list if we're about to start a new list
				369	item (i.e., if the next token is a bullet).
				370	"""
				371	indent = token.indent
				372	if indent != None:
				373	while (len(stack) > 2):
				374	pop = 0
				375
				376	# Dedent past a block
				377	if indent_stack[-1]!=None and indent<indent_stack[-1]: pop=1
				378	elif indent_stack[-1]==None and indent<indent_stack[-2]: pop=1
				379
				380	# Dedent to a list item, if it is follwed by another list
				381	# item with the same indentation.
				382	elif (token.tag == 'bullet' and indent==indent_stack[-2] and
				383	stack[-1].tag in ('li', 'field')): pop=1
				384
				385	# End of a list (no more list items available)
				386	elif (stack[-1].tag in ('ulist', 'olist') and
				387	(token.tag != 'bullet' or token.contents[-1] == ':')):
				388	pop=1
				389
				390	# Pop the block, if it's complete. Otherwise, we're done.
				391	if pop == 0: return
				392	stack.pop()
				393	indent_stack.pop()
				394
				395	def _add_para(doc, para_token, stack, indent_stack, errors):
				396	"""Colorize the given paragraph, and add it to the DOM tree."""
				397	# Check indentation, and update the parent's indentation
				398	# when appropriate.
				399	if indent_stack[-1] == None:
				400	indent_stack[-1] = para_token.indent
				401	if para_token.indent == indent_stack[-1]:
				402	# Colorize the paragraph and add it.
				403	para = _colorize(doc, para_token, errors)
				404	if para_token.inline:
				405	para.attribs['inline'] = True
				406	stack[-1].children.append(para)
				407	else:
				408	estr = "Improper paragraph indentation."
				409	errors.append(StructuringError(estr, para_token.startline))
				410
				411	def _add_section(doc, heading_token, stack, indent_stack, errors):
				412	"""Add a new section to the DOM tree, with the given heading."""
				413	if indent_stack[-1] == None:
				414	indent_stack[-1] = heading_token.indent
				415	elif indent_stack[-1] != heading_token.indent:
				416	estr = "Improper heading indentation."
				417	errors.append(StructuringError(estr, heading_token.startline))
				418
				419	# Check for errors.
				420	for tok in stack[2:]:
				421	if tok.tag != "section":
				422	estr = "Headings must occur at the top level."
				423	errors.append(StructuringError(estr, heading_token.startline))
				424	break
				425	if (heading_token.level+2) > len(stack):
				426	estr = "Wrong underline character for heading."
				427	errors.append(StructuringError(estr, heading_token.startline))
				428
				429	# Pop the appropriate number of headings so we're at the
				430	# correct level.
				431	stack[heading_token.level+2:] = []
				432	indent_stack[heading_token.level+2:] = []
				433
				434	# Colorize the heading
				435	head = _colorize(doc, heading_token, errors, 'heading')
				436
				437	# Add the section's and heading's DOM elements.
				438	sec = Element("section")
				439	stack[-1].children.append(sec)
				440	stack.append(sec)
				441	sec.children.append(head)
				442	indent_stack.append(None)
				443
				444	def _add_list(doc, bullet_token, stack, indent_stack, errors):
				445	"""
				446	Add a new list item or field to the DOM tree, with the given
				447	bullet or field tag. When necessary, create the associated
				448	list.
				449	"""
				450	# Determine what type of bullet it is.
				451	if bullet_token.contents[-1] == '-':
				452	list_type = 'ulist'
				453	elif bullet_token.contents[-1] == '.':
				454	list_type = 'olist'
				455	elif bullet_token.contents[-1] == ':':
				456	list_type = 'fieldlist'
				457	else:
				458	raise AssertionError('Bad Bullet: %r' % bullet_token.contents)
				459
				460	# Is this a new list?
				461	newlist = 0
				462	if stack[-1].tag != list_type:
				463	newlist = 1
				464	elif list_type == 'olist' and stack[-1].tag == 'olist':
				465	old_listitem = stack[-1].children[-1]
				466	old_bullet = old_listitem.attribs.get("bullet").split('.')[:-1]
				467	new_bullet = bullet_token.contents.split('.')[:-1]
				468	if (new_bullet[:-1] != old_bullet[:-1] or
				469	int(new_bullet[-1]) != int(old_bullet[-1])+1):
				470	newlist = 1
				471
				472	# Create the new list.
				473	if newlist:
				474	if stack[-1].tag is 'fieldlist':
				475	# The new list item is not a field list item (since this
				476	# is a new list); but it's indented the same as the field
				477	# list. This either means that they forgot to indent the
				478	# list, or they are trying to put something after the
				479	# field list. The first one seems more likely, so we'll
				480	# just warn about that (to avoid confusion).
				481	estr = "Lists must be indented."
				482	errors.append(StructuringError(estr, bullet_token.startline))
				483	if stack[-1].tag in ('ulist', 'olist', 'fieldlist'):
				484	stack.pop()
				485	indent_stack.pop()
				486
				487	if (list_type != 'fieldlist' and indent_stack[-1] is not None and
				488	bullet_token.indent == indent_stack[-1]):
				489	# Ignore this error if there's text on the same line as
				490	# the comment-opening quote -- epydoc can't reliably
				491	# determine the indentation for that line.
				492	if bullet_token.startline != 1 or bullet_token.indent != 0:
				493	estr = "Lists must be indented."
				494	errors.append(StructuringError(estr, bullet_token.startline))
				495
				496	if list_type == 'fieldlist':
				497	# Fieldlist should be at the top-level.
				498	for tok in stack[2:]:
				499	if tok.tag != "section":
				500	estr = "Fields must be at the top level."
				501	errors.append(
				502	StructuringError(estr, bullet_token.startline))
				503	break
				504	stack[2:] = []
				505	indent_stack[2:] = []
				506
				507	# Add the new list.
				508	lst = Element(list_type)
				509	stack[-1].children.append(lst)
				510	stack.append(lst)
				511	indent_stack.append(bullet_token.indent)
				512	if list_type == 'olist':
				513	start = bullet_token.contents.split('.')[:-1]
				514	if start != '1':
				515	lst.attribs["start"] = start[-1]
				516
				517	# Fields are treated somewhat specially: A "fieldlist"
				518	# node is created to make the parsing simpler, but fields
				519	# are adjoined directly into the "epytext" node, not into
				520	# the "fieldlist" node.
				521	if list_type == 'fieldlist':
				522	li = Element("field")
				523	token_words = bullet_token.contents[1:-1].split(None, 1)
				524	tag_elt = Element("tag")
				525	tag_elt.children.append(token_words[0])
				526	li.children.append(tag_elt)
				527
				528	if len(token_words) > 1:
				529	arg_elt = Element("arg")
				530	arg_elt.children.append(token_words[1])
				531	li.children.append(arg_elt)
				532	else:
				533	li = Element("li")
				534	if list_type == 'olist':
				535	li.attribs["bullet"] = bullet_token.contents
				536
				537	# Add the bullet.
				538	stack[-1].children.append(li)
				539	stack.append(li)
				540	indent_stack.append(None)
				541
				542	##################################################
				543	## Tokenization
				544	##################################################
				545
				546	class Token:
				547	"""
				548	C{Token}s are an intermediate data structure used while
				549	constructing the structuring DOM tree for a formatted docstring.
				550	There are five types of C{Token}:
				551
				552	- Paragraphs
				553	- Literal blocks
				554	- Doctest blocks
				555	- Headings
				556	- Bullets
				557
				558	The text contained in each C{Token} is stored in the
				559	C{contents} variable. The string in this variable has been
				560	normalized. For paragraphs, this means that it has been converted
				561	into a single line of text, with newline/indentation replaced by
				562	single spaces. For literal blocks and doctest blocks, this means
				563	that the appropriate amount of leading whitespace has been removed
				564	from each line.
				565
				566	Each C{Token} has an indentation level associated with it,
				567	stored in the C{indent} variable. This indentation level is used
				568	by the structuring procedure to assemble hierarchical blocks.
				569
				570	@type tag: C{string}
				571	@ivar tag: This C{Token}'s type. Possible values are C{Token.PARA}
				572	(paragraph), C{Token.LBLOCK} (literal block), C{Token.DTBLOCK}
				573	(doctest block), C{Token.HEADINGC}, and C{Token.BULLETC}.
				574
				575	@type startline: C{int}
				576	@ivar startline: The line on which this C{Token} begins. This
				577	line number is only used for issuing errors.
				578
				579	@type contents: C{string}
				580	@ivar contents: The normalized text contained in this C{Token}.
				581
				582	@type indent: C{int} or C{None}
				583	@ivar indent: The indentation level of this C{Token} (in
				584	number of leading spaces). A value of C{None} indicates an
				585	unknown indentation; this is used for list items and fields
				586	that begin with one-line paragraphs.
				587
				588	@type level: C{int} or C{None}
				589	@ivar level: The heading-level of this C{Token} if it is a
				590	heading; C{None}, otherwise. Valid heading levels are 0, 1,
				591	and 2.
				592
				593	@type inline: C{bool}
				594	@ivar inline: If True, the element is an inline level element, comparable
				595	to an HTML C{<span>} tag. Else, it is a block level element, comparable
				596	to an HTML C{<div>}.
				597
				598	@type PARA: C{string}
				599	@cvar PARA: The C{tag} value for paragraph C{Token}s.
				600	@type LBLOCK: C{string}
				601	@cvar LBLOCK: The C{tag} value for literal C{Token}s.
				602	@type DTBLOCK: C{string}
				603	@cvar DTBLOCK: The C{tag} value for doctest C{Token}s.
				604	@type HEADING: C{string}
				605	@cvar HEADING: The C{tag} value for heading C{Token}s.
				606	@type BULLET: C{string}
				607	@cvar BULLET: The C{tag} value for bullet C{Token}s. This C{tag}
				608	value is also used for field tag C{Token}s, since fields
				609	function syntactically the same as list items.
				610	"""
				611	# The possible token types.
				612	PARA = "para"
				613	LBLOCK = "literalblock"
				614	DTBLOCK = "doctestblock"
				615	HEADING = "heading"
				616	BULLET = "bullet"
				617
				618	def __init__(self, tag, startline, contents, indent, level=None,
				619	inline=False):
				620	"""
				621	Create a new C{Token}.
				622
				623	@param tag: The type of the new C{Token}.
				624	@type tag: C{string}
				625	@param startline: The line on which the new C{Token} begins.
				626	@type startline: C{int}
				627	@param contents: The normalized contents of the new C{Token}.
				628	@type contents: C{string}
				629	@param indent: The indentation of the new C{Token} (in number
				630	of leading spaces). A value of C{None} indicates an
				631	unknown indentation.
				632	@type indent: C{int} or C{None}
				633	@param level: The heading-level of this C{Token} if it is a
				634	heading; C{None}, otherwise.
				635	@type level: C{int} or C{None}
				636	@param inline: Is this C{Token} inline as a C{<span>}?.
				637	@type inline: C{bool}
				638	"""
				639	self.tag = tag
				640	self.startline = startline
				641	self.contents = contents
				642	self.indent = indent
				643	self.level = level
				644	self.inline = inline
				645
				646	def __repr__(self):
				647	"""
				648	@rtype: C{string}
				649	@return: the formal representation of this C{Token}.
				650	C{Token}s have formal representaitons of the form::
				651	<Token: para at line 12>
				652	"""
				653	return '<Token: %s at line %s>' % (self.tag, self.startline)
				654
				655	def to_dom(self, doc):
				656	"""
				657	@return: a DOM representation of this C{Token}.
				658	@rtype: L{Element}
				659	"""
				660	e = Element(self.tag)
				661	e.children.append(self.contents)
				662	return e
				663
				664	# Construct regular expressions for recognizing bullets. These are
				665	# global so they don't have to be reconstructed each time we tokenize
				666	# a docstring.
				667	_ULIST_BULLET = '[-]( +\|$)'
				668	_OLIST_BULLET = '(\d+[.])+( +\|$)'
				669	_FIELD_BULLET = '@\w+( [^{}:\n]+)?:'
				670	_BULLET_RE = re.compile(_ULIST_BULLET + '\|' +
				671	_OLIST_BULLET + '\|' +
				672	_FIELD_BULLET)
				673	_LIST_BULLET_RE = re.compile(_ULIST_BULLET + '\|' + _OLIST_BULLET)
				674	_FIELD_BULLET_RE = re.compile(_FIELD_BULLET)
				675	del _ULIST_BULLET, _OLIST_BULLET, _FIELD_BULLET
				676
				677	def _tokenize_doctest(lines, start, block_indent, tokens, errors):
				678	"""
				679	Construct a L{Token} containing the doctest block starting at
				680	C{lines[start]}, and append it to C{tokens}. C{block_indent}
				681	should be the indentation of the doctest block. Any errors
				682	generated while tokenizing the doctest block will be appended to
				683	C{errors}.
				684
				685	@param lines: The list of lines to be tokenized
				686	@param start: The index into C{lines} of the first line of the
				687	doctest block to be tokenized.
				688	@param block_indent: The indentation of C{lines[start]}. This is
				689	the indentation of the doctest block.
				690	@param errors: A list where any errors generated during parsing
				691	will be stored. If no list is specified, then errors will
				692	generate exceptions.
				693	@return: The line number of the first line following the doctest
				694	block.
				695
				696	@type lines: C{list} of C{string}
				697	@type start: C{int}
				698	@type block_indent: C{int}
				699	@type tokens: C{list} of L{Token}
				700	@type errors: C{list} of L{ParseError}
				701	@rtype: C{int}
				702	"""
				703	# If they dedent past block_indent, keep track of the minimum
				704	# indentation. This is used when removing leading indentation
				705	# from the lines of the doctest block.
				706	min_indent = block_indent
				707
				708	linenum = start + 1
				709	while linenum < len(lines):
				710	# Find the indentation of this line.
				711	line = lines[linenum]
				712	indent = len(line) - len(line.lstrip())
				713
				714	# A blank line ends doctest block.
				715	if indent == len(line): break
				716
				717	# A Dedent past block_indent is an error.
				718	if indent < block_indent:
				719	min_indent = min(min_indent, indent)
				720	estr = 'Improper doctest block indentation.'
				721	errors.append(TokenizationError(estr, linenum))
				722
				723	# Go on to the next line.
				724	linenum += 1
				725
				726	# Add the token, and return the linenum after the token ends.
				727	contents = [line[min_indent:] for line in lines[start:linenum]]
				728	contents = '\n'.join(contents)
				729	tokens.append(Token(Token.DTBLOCK, start, contents, block_indent))
				730	return linenum
				731
				732	def _tokenize_literal(lines, start, block_indent, tokens, errors):
				733	"""
				734	Construct a L{Token} containing the literal block starting at
				735	C{lines[start]}, and append it to C{tokens}. C{block_indent}
				736	should be the indentation of the literal block. Any errors
				737	generated while tokenizing the literal block will be appended to
				738	C{errors}.
				739
				740	@param lines: The list of lines to be tokenized
				741	@param start: The index into C{lines} of the first line of the
				742	literal block to be tokenized.
				743	@param block_indent: The indentation of C{lines[start]}. This is
				744	the indentation of the literal block.
				745	@param errors: A list of the errors generated by parsing. Any
				746	new errors generated while will tokenizing this paragraph
				747	will be appended to this list.
				748	@return: The line number of the first line following the literal
				749	block.
				750
				751	@type lines: C{list} of C{string}
				752	@type start: C{int}
				753	@type block_indent: C{int}
				754	@type tokens: C{list} of L{Token}
				755	@type errors: C{list} of L{ParseError}
				756	@rtype: C{int}
				757	"""
				758	linenum = start + 1
				759	while linenum < len(lines):
				760	# Find the indentation of this line.
				761	line = lines[linenum]
				762	indent = len(line) - len(line.lstrip())
				763
				764	# A Dedent to block_indent ends the literal block.
				765	# (Ignore blank likes, though)
				766	if len(line) != indent and indent <= block_indent:
				767	break
				768
				769	# Go on to the next line.
				770	linenum += 1
				771
				772	# Add the token, and return the linenum after the token ends.
				773	contents = [line[block_indent+1:] for line in lines[start:linenum]]
				774	contents = '\n'.join(contents)
				775	contents = re.sub('(\A[ \n]\n)\|(\n[ \n]\Z)', '', contents)
				776	tokens.append(Token(Token.LBLOCK, start, contents, block_indent))
				777	return linenum
				778
				779	def _tokenize_listart(lines, start, bullet_indent, tokens, errors):
				780	"""
				781	Construct L{Token}s for the bullet and the first paragraph of the
				782	list item (or field) starting at C{lines[start]}, and append them
				783	to C{tokens}. C{bullet_indent} should be the indentation of the
				784	list item. Any errors generated while tokenizing will be
				785	appended to C{errors}.
				786
				787	@param lines: The list of lines to be tokenized
				788	@param start: The index into C{lines} of the first line of the
				789	list item to be tokenized.
				790	@param bullet_indent: The indentation of C{lines[start]}. This is
				791	the indentation of the list item.
				792	@param errors: A list of the errors generated by parsing. Any
				793	new errors generated while will tokenizing this paragraph
				794	will be appended to this list.
				795	@return: The line number of the first line following the list
				796	item's first paragraph.
				797
				798	@type lines: C{list} of C{string}
				799	@type start: C{int}
				800	@type bullet_indent: C{int}
				801	@type tokens: C{list} of L{Token}
				802	@type errors: C{list} of L{ParseError}
				803	@rtype: C{int}
				804	"""
				805	linenum = start + 1
				806	para_indent = None
				807	doublecolon = lines[start].rstrip()[-2:] == '::'
				808
				809	# Get the contents of the bullet.
				810	para_start = _BULLET_RE.match(lines[start], bullet_indent).end()
				811	bcontents = lines[start][bullet_indent:para_start].strip()
				812
				813	while linenum < len(lines):
				814	# Find the indentation of this line.
				815	line = lines[linenum]
				816	indent = len(line) - len(line.lstrip())
				817
				818	# "::" markers end paragraphs.
				819	if doublecolon: break
				820	if line.rstrip()[-2:] == '::': doublecolon = 1
				821
				822	# A blank line ends the token
				823	if indent == len(line): break
				824
				825	# Dedenting past bullet_indent ends the list item.
				826	if indent < bullet_indent: break
				827
				828	# A line beginning with a bullet ends the token.
				829	if _BULLET_RE.match(line, indent): break
				830
				831	# If this is the second line, set the paragraph indentation, or
				832	# end the token, as appropriate.
				833	if para_indent == None: para_indent = indent
				834
				835	# A change in indentation ends the token
				836	if indent != para_indent: break
				837
				838	# Go on to the next line.
				839	linenum += 1
				840
				841	# Add the bullet token.
				842	tokens.append(Token(Token.BULLET, start, bcontents, bullet_indent,
				843	inline=True))
				844
				845	# Add the paragraph token.
				846	pcontents = ([lines[start][para_start:].strip()] +
				847	[line.strip() for line in lines[start+1:linenum]])
				848	pcontents = ' '.join(pcontents).strip()
				849	if pcontents:
				850	tokens.append(Token(Token.PARA, start, pcontents, para_indent,
				851	inline=True))
				852
				853	# Return the linenum after the paragraph token ends.
				854	return linenum
				855
				856	def _tokenize_para(lines, start, para_indent, tokens, errors):
				857	"""
				858	Construct a L{Token} containing the paragraph starting at
				859	C{lines[start]}, and append it to C{tokens}. C{para_indent}
				860	should be the indentation of the paragraph . Any errors
				861	generated while tokenizing the paragraph will be appended to
				862	C{errors}.
				863
				864	@param lines: The list of lines to be tokenized
				865	@param start: The index into C{lines} of the first line of the
				866	paragraph to be tokenized.
				867	@param para_indent: The indentation of C{lines[start]}. This is
				868	the indentation of the paragraph.
				869	@param errors: A list of the errors generated by parsing. Any
				870	new errors generated while will tokenizing this paragraph
				871	will be appended to this list.
				872	@return: The line number of the first line following the
				873	paragraph.
				874
				875	@type lines: C{list} of C{string}
				876	@type start: C{int}
				877	@type para_indent: C{int}
				878	@type tokens: C{list} of L{Token}
				879	@type errors: C{list} of L{ParseError}
				880	@rtype: C{int}
				881	"""
				882	linenum = start + 1
				883	doublecolon = 0
				884	while linenum < len(lines):
				885	# Find the indentation of this line.
				886	line = lines[linenum]
				887	indent = len(line) - len(line.lstrip())
				888
				889	# "::" markers end paragraphs.
				890	if doublecolon: break
				891	if line.rstrip()[-2:] == '::': doublecolon = 1
				892
				893	# Blank lines end paragraphs
				894	if indent == len(line): break
				895
				896	# Indentation changes end paragraphs
				897	if indent != para_indent: break
				898
				899	# List bullets end paragraphs
				900	if _BULLET_RE.match(line, indent): break
				901
				902	# Check for mal-formatted field items.
				903	if line[indent] == '@':
				904	estr = "Possible mal-formatted field item."
				905	errors.append(TokenizationError(estr, linenum, is_fatal=0))
				906
				907	# Go on to the next line.
				908	linenum += 1
				909
				910	contents = [line.strip() for line in lines[start:linenum]]
				911
				912	# Does this token look like a heading?
				913	if ((len(contents) < 2) or
				914	(contents[1][0] not in _HEADING_CHARS) or
				915	(abs(len(contents[0])-len(contents[1])) > 5)):
				916	looks_like_heading = 0
				917	else:
				918	looks_like_heading = 1
				919	for char in contents[1]:
				920	if char != contents[1][0]:
				921	looks_like_heading = 0
				922	break
				923
				924	if looks_like_heading:
				925	if len(contents[0]) != len(contents[1]):
				926	estr = ("Possible heading typo: the number of "+
				927	"underline characters must match the "+
				928	"number of heading characters.")
				929	errors.append(TokenizationError(estr, start, is_fatal=0))
				930	else:
				931	level = _HEADING_CHARS.index(contents[1][0])
				932	tokens.append(Token(Token.HEADING, start,
				933	contents[0], para_indent, level))
				934	return start+2
				935
				936	# Add the paragraph token, and return the linenum after it ends.
				937	contents = ' '.join(contents)
				938	tokens.append(Token(Token.PARA, start, contents, para_indent))
				939	return linenum
				940
				941	def _tokenize(str, errors):
				942	"""
				943	Split a given formatted docstring into an ordered list of
				944	C{Token}s, according to the epytext markup rules.
				945
				946	@param str: The epytext string
				947	@type str: C{string}
				948	@param errors: A list where any errors generated during parsing
				949	will be stored. If no list is specified, then errors will
				950	generate exceptions.
				951	@type errors: C{list} of L{ParseError}
				952	@return: a list of the C{Token}s that make up the given string.
				953	@rtype: C{list} of L{Token}
				954	"""
				955	tokens = []
				956	lines = str.split('\n')
				957
				958	# Scan through the lines, determining what @type of token we're
				959	# dealing with, and tokenizing it, as appropriate.
				960	linenum = 0
				961	while linenum < len(lines):
				962	# Get the current line and its indentation.
				963	line = lines[linenum]
				964	indent = len(line)-len(line.lstrip())
				965
				966	if indent == len(line):
				967	# Ignore blank lines.
				968	linenum += 1
				969	continue
				970	elif line[indent:indent+4] == '>>> ':
				971	# blocks starting with ">>> " are doctest block tokens.
				972	linenum = _tokenize_doctest(lines, linenum, indent,
				973	tokens, errors)
				974	elif _BULLET_RE.match(line, indent):
				975	# blocks starting with a bullet are LI start tokens.
				976	linenum = _tokenize_listart(lines, linenum, indent,
				977	tokens, errors)
				978	if tokens[-1].indent != None:
				979	indent = tokens[-1].indent
				980	else:
				981	# Check for mal-formatted field items.
				982	if line[indent] == '@':
				983	estr = "Possible mal-formatted field item."
				984	errors.append(TokenizationError(estr, linenum, is_fatal=0))
				985
				986	# anything else is either a paragraph or a heading.
				987	linenum = _tokenize_para(lines, linenum, indent, tokens, errors)
				988
				989	# Paragraph tokens ending in '::' initiate literal blocks.
				990	if (tokens[-1].tag == Token.PARA and
				991	tokens[-1].contents[-2:] == '::'):
				992	tokens[-1].contents = tokens[-1].contents[:-1]
				993	linenum = _tokenize_literal(lines, linenum, indent, tokens, errors)
				994
				995	return tokens
				996
				997
				998	##################################################
				999	## Inline markup ("colorizing")
				1000	##################################################
				1001
				1002	# Assorted regular expressions used for colorizing.
				1003	_BRACE_RE = re.compile('{\|}')
				1004	_TARGET_RE = re.compile('^(.?)\s<(?:URI:\|URL:)?([^<>]+)>$')
				1005
				1006	def _colorize(doc, token, errors, tagName='para'):
				1007	"""
				1008	Given a string containing the contents of a paragraph, produce a
				1009	DOM C{Element} encoding that paragraph. Colorized regions are
				1010	represented using DOM C{Element}s, and text is represented using
				1011	DOM C{Text}s.
				1012
				1013	@param errors: A list of errors. Any newly generated errors will
				1014	be appended to this list.
				1015	@type errors: C{list} of C{string}
				1016
				1017	@param tagName: The element tag for the DOM C{Element} that should
				1018	be generated.
				1019	@type tagName: C{string}
				1020
				1021	@return: a DOM C{Element} encoding the given paragraph.
				1022	@returntype: C{Element}
				1023	"""
				1024	str = token.contents
				1025	linenum = 0
				1026
				1027	# Maintain a stack of DOM elements, containing the ancestors of
				1028	# the text currently being analyzed. New elements are pushed when
				1029	# "{" is encountered, and old elements are popped when "}" is
				1030	# encountered.
				1031	stack = [Element(tagName)]
				1032
				1033	# This is just used to make error-reporting friendlier. It's a
				1034	# stack parallel to "stack" containing the index of each element's
				1035	# open brace.
				1036	openbrace_stack = [0]
				1037
				1038	# Process the string, scanning for '{' and '}'s. start is the
				1039	# index of the first unprocessed character. Each time through the
				1040	# loop, we process the text from the first unprocessed character
				1041	# to the next open or close brace.
				1042	start = 0
				1043	while 1:
				1044	match = _BRACE_RE.search(str, start)
				1045	if match == None: break
				1046	end = match.start()
				1047
				1048	# Open braces start new colorizing elements. When preceeded
				1049	# by a capital letter, they specify a colored region, as
				1050	# defined by the _COLORIZING_TAGS dictionary. Otherwise,
				1051	# use a special "literal braces" element (with tag "litbrace"),
				1052	# and convert them to literal braces once we find the matching
				1053	# close-brace.
				1054	if match.group() == '{':
				1055	if (end>0) and 'A' <= str[end-1] <= 'Z':
				1056	if (end-1) > start:
				1057	stack[-1].children.append(str[start:end-1])
				1058	if str[end-1] not in _COLORIZING_TAGS:
				1059	estr = "Unknown inline markup tag."
				1060	errors.append(ColorizingError(estr, token, end-1))
				1061	stack.append(Element('unknown'))
				1062	else:
				1063	tag = _COLORIZING_TAGS[str[end-1]]
				1064	stack.append(Element(tag))
				1065	else:
				1066	if end > start:
				1067	stack[-1].children.append(str[start:end])
				1068	stack.append(Element('litbrace'))
				1069	openbrace_stack.append(end)
				1070	stack[-2].children.append(stack[-1])
				1071
				1072	# Close braces end colorizing elements.
				1073	elif match.group() == '}':
				1074	# Check for (and ignore) unbalanced braces.
				1075	if len(stack) <= 1:
				1076	estr = "Unbalanced '}'."
				1077	errors.append(ColorizingError(estr, token, end))
				1078	start = end + 1
				1079	continue
				1080
				1081	# Add any remaining text.
				1082	if end > start:
				1083	stack[-1].children.append(str[start:end])
				1084
				1085	# Special handling for symbols:
				1086	if stack[-1].tag == 'symbol':
				1087	if (len(stack[-1].children) != 1 or
				1088	not isinstance(stack[-1].children[0], basestring)):
				1089	estr = "Invalid symbol code."
				1090	errors.append(ColorizingError(estr, token, end))
				1091	else:
				1092	symb = stack[-1].children[0]
				1093	if symb in _SYMBOLS:
				1094	# It's a symbol
				1095	stack[-2].children[-1] = Element('symbol', symb)
				1096	else:
				1097	estr = "Invalid symbol code."
				1098	errors.append(ColorizingError(estr, token, end))
				1099
				1100	# Special handling for escape elements:
				1101	if stack[-1].tag == 'escape':
				1102	if (len(stack[-1].children) != 1 or
				1103	not isinstance(stack[-1].children[0], basestring)):
				1104	estr = "Invalid escape code."
				1105	errors.append(ColorizingError(estr, token, end))
				1106	else:
				1107	escp = stack[-1].children[0]
				1108	if escp in _ESCAPES:
				1109	# It's an escape from _ESCPAES
				1110	stack[-2].children[-1] = _ESCAPES[escp]
				1111	elif len(escp) == 1:
				1112	# It's a single-character escape (eg E{.})
				1113	stack[-2].children[-1] = escp
				1114	else:
				1115	estr = "Invalid escape code."
				1116	errors.append(ColorizingError(estr, token, end))
				1117
				1118	# Special handling for literal braces elements:
				1119	if stack[-1].tag == 'litbrace':
				1120	stack[-2].children[-1:] = ['{'] + stack[-1].children + ['}']
				1121
				1122	# Special handling for graphs:
				1123	if stack[-1].tag == 'graph':
				1124	_colorize_graph(doc, stack[-1], token, end, errors)
				1125
				1126	# Special handling for link-type elements:
				1127	if stack[-1].tag in _LINK_COLORIZING_TAGS:
				1128	_colorize_link(doc, stack[-1], token, end, errors)
				1129
				1130	# Pop the completed element.
				1131	openbrace_stack.pop()
				1132	stack.pop()
				1133
				1134	start = end+1
				1135
				1136	# Add any final text.
				1137	if start < len(str):
				1138	stack[-1].children.append(str[start:])
				1139
				1140	if len(stack) != 1:
				1141	estr = "Unbalanced '{'."
				1142	errors.append(ColorizingError(estr, token, openbrace_stack[-1]))
				1143
				1144	return stack[0]
				1145
				1146	GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph', 'callgraph']
				1147
				1148	def _colorize_graph(doc, graph, token, end, errors):
				1149	"""
				1150	Eg::
				1151	G{classtree}
				1152	G{classtree x, y, z}
				1153	G{importgraph}
				1154	"""
				1155	bad_graph_spec = False
				1156
				1157	children = graph.children[:]
				1158	graph.children = []
				1159
				1160	if len(children) != 1 or not isinstance(children[0], basestring):
				1161	bad_graph_spec = "Bad graph specification"
				1162	else:
				1163	pieces = children[0].split(None, 1)
				1164	graphtype = pieces[0].replace(':','').strip().lower()
				1165	if graphtype in GRAPH_TYPES:
				1166	if len(pieces) == 2:
				1167	if re.match(r'\s:?\s([\w\.]+\s,?\s)*', pieces[1]):
				1168	args = pieces[1].replace(',', ' ').replace(':','').split()
				1169	else:
				1170	bad_graph_spec = "Bad graph arg list"
				1171	else:
				1172	args = []
				1173	else:
				1174	bad_graph_spec = ("Bad graph type %s -- use one of %s" %
				1175	(pieces[0], ', '.join(GRAPH_TYPES)))
				1176
				1177	if bad_graph_spec:
				1178	errors.append(ColorizingError(bad_graph_spec, token, end))
				1179	graph.children.append('none')
				1180	graph.children.append('')
				1181	return
				1182
				1183	graph.children.append(graphtype)
				1184	for arg in args:
				1185	graph.children.append(arg)
				1186
				1187	def _colorize_link(doc, link, token, end, errors):
				1188	variables = link.children[:]
				1189
				1190	# If the last child isn't text, we know it's bad.
				1191	if len(variables)==0 or not isinstance(variables[-1], basestring):
				1192	estr = "Bad %s target." % link.tag
				1193	errors.append(ColorizingError(estr, token, end))
				1194	return
				1195
				1196	# Did they provide an explicit target?
				1197	match2 = _TARGET_RE.match(variables[-1])
				1198	if match2:
				1199	(text, target) = match2.groups()
				1200	variables[-1] = text
				1201	# Can we extract an implicit target?
				1202	elif len(variables) == 1:
				1203	target = variables[0]
				1204	else:
				1205	estr = "Bad %s target." % link.tag
				1206	errors.append(ColorizingError(estr, token, end))
				1207	return
				1208
				1209	# Construct the name element.
				1210	name_elt = Element('name', *variables)
				1211
				1212	# Clean up the target. For URIs, assume http or mailto if they
				1213	# don't specify (no relative urls)
				1214	target = re.sub(r'\s', '', target)
				1215	if link.tag=='uri':
				1216	if not re.match(r'\w+:', target):
				1217	if re.match(r'\w+@(\w+)(\.\w+)*', target):
				1218	target = 'mailto:' + target
				1219	else:
				1220	target = 'http://'+target
				1221	elif link.tag=='link':
				1222	# Remove arg lists for functions (e.g., L{_colorize_link()})
				1223	target = re.sub(r'$.*$$', '', target)
				1224	if not re.match(r'^[a-zA-Z_]\w(\.[a-zA-Z_]\w)*$', target):
				1225	estr = "Bad link target."
				1226	errors.append(ColorizingError(estr, token, end))
				1227	return
				1228
				1229	# Construct the target element.
				1230	target_elt = Element('target', target)
				1231
				1232	# Add them to the link element.
				1233	link.children = [name_elt, target_elt]
				1234
				1235	##################################################
				1236	## Formatters
				1237	##################################################
				1238
				1239	def to_epytext(tree, indent=0, seclevel=0):
				1240	"""
				1241	Convert a DOM document encoding epytext back to an epytext string.
				1242	This is the inverse operation from L{parse}. I.e., assuming there
				1243	are no errors, the following is true:
				1244	- C{parse(to_epytext(tree)) == tree}
				1245
				1246	The inverse is true, except that whitespace, line wrapping, and
				1247	character escaping may be done differently.
				1248	- C{to_epytext(parse(str)) == str} (approximately)
				1249
				1250	@param tree: A DOM document encoding of an epytext string.
				1251	@type tree: C{Element}
				1252	@param indent: The indentation for the string representation of
				1253	C{tree}. Each line of the returned string will begin with
				1254	C{indent} space characters.
				1255	@type indent: C{int}
				1256	@param seclevel: The section level that C{tree} appears at. This
				1257	is used to generate section headings.
				1258	@type seclevel: C{int}
				1259	@return: The epytext string corresponding to C{tree}.
				1260	@rtype: C{string}
				1261	"""
				1262	if isinstance(tree, basestring):
				1263	str = re.sub(r'\{', '\0', tree)
				1264	str = re.sub(r'\}', '\1', str)
				1265	return str
				1266
				1267	if tree.tag == 'epytext': indent -= 2
				1268	if tree.tag == 'section': seclevel += 1
				1269	variables = [to_epytext(c, indent+2, seclevel) for c in tree.children]
				1270	childstr = ''.join(variables)
				1271
				1272	# Clean up for literal blocks (add the double "::" back)
				1273	childstr = re.sub(':(\s*)\2', '::\\1', childstr)
				1274
				1275	if tree.tag == 'para':
				1276	str = wordwrap(childstr, indent)+'\n'
				1277	str = re.sub(r'((^\|\n)\s*\d+)\.', r'\1E{.}', str)
				1278	str = re.sub(r'((^\|\n)\s*)-', r'\1E{-}', str)
				1279	str = re.sub(r'((^\|\n)\s*)@', r'\1E{@}', str)
				1280	str = re.sub(r'::(\s*($\|\n))', r'E{:}E{:}\1', str)
				1281	str = re.sub('\0', 'E{lb}', str)
				1282	str = re.sub('\1', 'E{rb}', str)
				1283	return str
				1284	elif tree.tag == 'li':
				1285	bullet = tree.attribs.get('bullet') or '-'
				1286	return indent*' '+ bullet + ' ' + childstr.lstrip()
				1287	elif tree.tag == 'heading':
				1288	str = re.sub('\0', 'E{lb}',childstr)
				1289	str = re.sub('\1', 'E{rb}', str)
				1290	uline = len(childstr)*_HEADING_CHARS[seclevel-1]
				1291	return (indent-2)' ' + str + '\n' + (indent-2)' '+uline+'\n'
				1292	elif tree.tag == 'doctestblock':
				1293	str = re.sub('\0', '{', childstr)
				1294	str = re.sub('\1', '}', str)
				1295	lines = [' '+indent*' '+line for line in str.split('\n')]
				1296	return '\n'.join(lines) + '\n\n'
				1297	elif tree.tag == 'literalblock':
				1298	str = re.sub('\0', '{', childstr)
				1299	str = re.sub('\1', '}', str)
				1300	lines = [(indent+1)*' '+line for line in str.split('\n')]
				1301	return '\2' + '\n'.join(lines) + '\n\n'
				1302	elif tree.tag == 'field':
				1303	numargs = 0
				1304	while tree.children[numargs+1].tag == 'arg': numargs += 1
				1305	tag = variables[0]
				1306	args = variables[1:1+numargs]
				1307	body = variables[1+numargs:]
				1308	str = (indent)*' '+'@'+variables[0]
				1309	if args: str += '(' + ', '.join(args) + ')'
				1310	return str + ':\n' + ''.join(body)
				1311	elif tree.tag == 'target':
				1312	return '<%s>' % childstr
				1313	elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext',
				1314	'section', 'olist', 'ulist', 'name'):
				1315	return childstr
				1316	elif tree.tag == 'symbol':
				1317	return 'E{%s}' % childstr
				1318	elif tree.tag == 'graph':
				1319	return 'G{%s}' % ' '.join(variables)
				1320	else:
				1321	for (tag, name) in _COLORIZING_TAGS.items():
				1322	if name == tree.tag:
				1323	return '%s{%s}' % (tag, childstr)
				1324	raise ValueError('Unknown DOM element %r' % tree.tag)
				1325
				1326	SYMBOL_TO_PLAINTEXT = {
				1327	'crarr': '\\',
				1328	}
				1329
				1330	def to_plaintext(tree, indent=0, seclevel=0):
				1331	"""
				1332	Convert a DOM document encoding epytext to a string representation.
				1333	This representation is similar to the string generated by
				1334	C{to_epytext}, but C{to_plaintext} removes inline markup, prints
				1335	escaped characters in unescaped form, etc.
				1336
				1337	@param tree: A DOM document encoding of an epytext string.
				1338	@type tree: C{Element}
				1339	@param indent: The indentation for the string representation of
				1340	C{tree}. Each line of the returned string will begin with
				1341	C{indent} space characters.
				1342	@type indent: C{int}
				1343	@param seclevel: The section level that C{tree} appears at. This
				1344	is used to generate section headings.
				1345	@type seclevel: C{int}
				1346	@return: The epytext string corresponding to C{tree}.
				1347	@rtype: C{string}
				1348	"""
				1349	if isinstance(tree, basestring): return tree
				1350
				1351	if tree.tag == 'section': seclevel += 1
				1352
				1353	# Figure out the child indent level.
				1354	if tree.tag == 'epytext': cindent = indent
				1355	elif tree.tag == 'li' and tree.attribs.get('bullet'):
				1356	cindent = indent + 1 + len(tree.attribs.get('bullet'))
				1357	else:
				1358	cindent = indent + 2
				1359	variables = [to_plaintext(c, cindent, seclevel) for c in tree.children]
				1360	childstr = ''.join(variables)
				1361
				1362	if tree.tag == 'para':
				1363	return wordwrap(childstr, indent)+'\n'
				1364	elif tree.tag == 'li':
				1365	# We should be able to use getAttribute here; but there's no
				1366	# convenient way to test if an element has an attribute..
				1367	bullet = tree.attribs.get('bullet') or '-'
				1368	return indent*' ' + bullet + ' ' + childstr.lstrip()
				1369	elif tree.tag == 'heading':
				1370	uline = len(childstr)*_HEADING_CHARS[seclevel-1]
				1371	return ((indent-2)*' ' + childstr + '\n' +
				1372	(indent-2)*' ' + uline + '\n')
				1373	elif tree.tag == 'doctestblock':
				1374	lines = [(indent+2)*' '+line for line in childstr.split('\n')]
				1375	return '\n'.join(lines) + '\n\n'
				1376	elif tree.tag == 'literalblock':
				1377	lines = [(indent+1)*' '+line for line in childstr.split('\n')]
				1378	return '\n'.join(lines) + '\n\n'
				1379	elif tree.tag == 'fieldlist':
				1380	return childstr
				1381	elif tree.tag == 'field':
				1382	numargs = 0
				1383	while tree.children[numargs+1].tag == 'arg': numargs += 1
				1384	tag = variables[0]
				1385	args = variables[1:1+numargs]
				1386	body = variables[1+numargs:]
				1387	str = (indent)*' '+'@'+variables[0]
				1388	if args: str += '(' + ', '.join(args) + ')'
				1389	return str + ':\n' + ''.join(body)
				1390	elif tree.tag == 'uri':
				1391	if len(variables) != 2: raise ValueError('Bad URI ')
				1392	elif variables[0] == variables[1]: return '<%s>' % variables[1]
				1393	else: return '%r<%s>' % (variables[0], variables[1])
				1394	elif tree.tag == 'link':
				1395	if len(variables) != 2: raise ValueError('Bad Link')
				1396	return '%s' % variables[0]
				1397	elif tree.tag in ('olist', 'ulist'):
				1398	# [xx] always use condensed lists.
				1399	## Use a condensed list if each list item is 1 line long.
				1400	#for child in variables:
				1401	# if child.count('\n') > 2: return childstr
				1402	return childstr.replace('\n\n', '\n')+'\n'
				1403	elif tree.tag == 'symbol':
				1404	return '%s' % SYMBOL_TO_PLAINTEXT.get(childstr, childstr)
				1405	elif tree.tag == 'graph':
				1406	return '<<%s graph: %s>>' % (variables[0], ', '.join(variables[1:]))
				1407	else:
				1408	# Assume that anything else can be passed through.
				1409	return childstr
				1410
				1411	def to_debug(tree, indent=4, seclevel=0):
				1412	"""
				1413	Convert a DOM document encoding epytext back to an epytext string,
				1414	annotated with extra debugging information. This function is
				1415	similar to L{to_epytext}, but it adds explicit information about
				1416	where different blocks begin, along the left margin.
				1417
				1418	@param tree: A DOM document encoding of an epytext string.
				1419	@type tree: C{Element}
				1420	@param indent: The indentation for the string representation of
				1421	C{tree}. Each line of the returned string will begin with
				1422	C{indent} space characters.
				1423	@type indent: C{int}
				1424	@param seclevel: The section level that C{tree} appears at. This
				1425	is used to generate section headings.
				1426	@type seclevel: C{int}
				1427	@return: The epytext string corresponding to C{tree}.
				1428	@rtype: C{string}
				1429	"""
				1430	if isinstance(tree, basestring):
				1431	str = re.sub(r'\{', '\0', tree)
				1432	str = re.sub(r'\}', '\1', str)
				1433	return str
				1434
				1435	if tree.tag == 'section': seclevel += 1
				1436	variables = [to_debug(c, indent+2, seclevel) for c in tree.children]
				1437	childstr = ''.join(variables)
				1438
				1439	# Clean up for literal blocks (add the double "::" back)
				1440	childstr = re.sub(':( *\n \\|\n)\2', '::\\1', childstr)
				1441
				1442	if tree.tag == 'para':
				1443	str = wordwrap(childstr, indent-6, 69)+'\n'
				1444	str = re.sub(r'((^\|\n)\s*\d+)\.', r'\1E{.}', str)
				1445	str = re.sub(r'((^\|\n)\s*)-', r'\1E{-}', str)
				1446	str = re.sub(r'((^\|\n)\s*)@', r'\1E{@}', str)
				1447	str = re.sub(r'::(\s*($\|\n))', r'E{:}E{:}\1', str)
				1448	str = re.sub('\0', 'E{lb}', str)
				1449	str = re.sub('\1', 'E{rb}', str)
				1450	lines = str.rstrip().split('\n')
				1451	lines[0] = ' P>\|' + lines[0]
				1452	lines[1:] = [' \|'+l for l in lines[1:]]
				1453	return '\n'.join(lines)+'\n \|\n'
				1454	elif tree.tag == 'li':
				1455	bullet = tree.attribs.get('bullet') or '-'
				1456	return ' LI>\|'+ (indent-6)*' '+ bullet + ' ' + childstr[6:].lstrip()
				1457	elif tree.tag in ('olist', 'ulist'):
				1458	return 'LIST>\|'+(indent-4)*' '+childstr[indent+2:]
				1459	elif tree.tag == 'heading':
				1460	str = re.sub('\0', 'E{lb}', childstr)
				1461	str = re.sub('\1', 'E{rb}', str)
				1462	uline = len(childstr)*_HEADING_CHARS[seclevel-1]
				1463	return ('SEC'+`seclevel`+'>\|'+(indent-8)*' ' + str + '\n' +
				1464	' \|'+(indent-8)*' ' + uline + '\n')
				1465	elif tree.tag == 'doctestblock':
				1466	str = re.sub('\0', '{', childstr)
				1467	str = re.sub('\1', '}', str)
				1468	lines = [' \|'+(indent-4)*' '+line for line in str.split('\n')]
				1469	lines[0] = 'DTST>'+lines[0][5:]
				1470	return '\n'.join(lines) + '\n \|\n'
				1471	elif tree.tag == 'literalblock':
				1472	str = re.sub('\0', '{', childstr)
				1473	str = re.sub('\1', '}', str)
				1474	lines = [' \|'+(indent-5)*' '+line for line in str.split('\n')]
				1475	lines[0] = ' LIT>'+lines[0][5:]
				1476	return '\2' + '\n'.join(lines) + '\n \|\n'
				1477	elif tree.tag == 'field':
				1478	numargs = 0
				1479	while tree.children[numargs+1].tag == 'arg': numargs += 1
				1480	tag = variables[0]
				1481	args = variables[1:1+numargs]
				1482	body = variables[1+numargs:]
				1483	str = ' FLD>\|'+(indent-6)*' '+'@'+variables[0]
				1484	if args: str += '(' + ', '.join(args) + ')'
				1485	return str + ':\n' + ''.join(body)
				1486	elif tree.tag == 'target':
				1487	return '<%s>' % childstr
				1488	elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext',
				1489	'section', 'olist', 'ulist', 'name'):
				1490	return childstr
				1491	elif tree.tag == 'symbol':
				1492	return 'E{%s}' % childstr
				1493	elif tree.tag == 'graph':
				1494	return 'G{%s}' % ' '.join(variables)
				1495	else:
				1496	for (tag, name) in _COLORIZING_TAGS.items():
				1497	if name == tree.tag:
				1498	return '%s{%s}' % (tag, childstr)
				1499	raise ValueError('Unknown DOM element %r' % tree.tag)
				1500
				1501	##################################################
				1502	## Top-Level Wrapper function
				1503	##################################################
				1504	def pparse(str, show_warnings=1, show_errors=1, stream=sys.stderr):
				1505	"""
				1506	Pretty-parse the string. This parses the string, and catches any
				1507	warnings or errors produced. Any warnings and errors are
				1508	displayed, and the resulting DOM parse structure is returned.
				1509
				1510	@param str: The string to parse.
				1511	@type str: C{string}
				1512	@param show_warnings: Whether or not to display non-fatal errors
				1513	generated by parsing C{str}.
				1514	@type show_warnings: C{boolean}
				1515	@param show_errors: Whether or not to display fatal errors
				1516	generated by parsing C{str}.
				1517	@type show_errors: C{boolean}
				1518	@param stream: The stream that warnings and errors should be
				1519	written to.
				1520	@type stream: C{stream}
				1521	@return: a DOM document encoding the contents of C{str}.
				1522	@rtype: C{Element}
				1523	@raise SyntaxError: If any fatal errors were encountered.
				1524	"""
				1525	errors = []
				1526	confused = 0
				1527	try:
				1528	val = parse(str, errors)
				1529	warnings = [e for e in errors if not e.is_fatal()]
				1530	errors = [e for e in errors if e.is_fatal()]
				1531	except:
				1532	confused = 1
				1533
				1534	if not show_warnings: warnings = []
				1535	warnings.sort()
				1536	errors.sort()
				1537	if warnings:
				1538	print >>stream, '='*SCRWIDTH
				1539	print >>stream, "WARNINGS"
				1540	print >>stream, '-'*SCRWIDTH
				1541	for warning in warnings:
				1542	print >>stream, warning.as_warning()
				1543	print >>stream, '='*SCRWIDTH
				1544	if errors and show_errors:
				1545	if not warnings: print >>stream, '='*SCRWIDTH
				1546	print >>stream, "ERRORS"
				1547	print >>stream, '-'*SCRWIDTH
				1548	for error in errors:
				1549	print >>stream, error
				1550	print >>stream, '='*SCRWIDTH
				1551
				1552	if confused: raise
				1553	elif errors: raise SyntaxError('Encountered Errors')
				1554	else: return val
				1555
				1556	##################################################
				1557	## Parse Errors
				1558	##################################################
				1559
				1560	class TokenizationError(ParseError):
				1561	"""
				1562	An error generated while tokenizing a formatted documentation
				1563	string.
				1564	"""
				1565
				1566	class StructuringError(ParseError):
				1567	"""
				1568	An error generated while structuring a formatted documentation
				1569	string.
				1570	"""
				1571
				1572	class ColorizingError(ParseError):
				1573	"""
				1574	An error generated while colorizing a paragraph.
				1575	"""
				1576	def __init__(self, descr, token, charnum, is_fatal=1):
				1577	"""
				1578	Construct a new colorizing exception.
				1579
				1580	@param descr: A short description of the error.
				1581	@type descr: C{string}
				1582	@param token: The token where the error occured
				1583	@type token: L{Token}
				1584	@param charnum: The character index of the position in
				1585	C{token} where the error occured.
				1586	@type charnum: C{int}
				1587	"""
				1588	ParseError.__init__(self, descr, token.startline, is_fatal)
				1589	self.token = token
				1590	self.charnum = charnum
				1591
				1592	CONTEXT_RANGE = 20
				1593	def descr(self):
				1594	RANGE = self.CONTEXT_RANGE
				1595	if self.charnum <= RANGE:
				1596	left = self.token.contents[0:self.charnum]
				1597	else:
				1598	left = '...'+self.token.contents[self.charnum-RANGE:self.charnum]
				1599	if (len(self.token.contents)-self.charnum) <= RANGE:
				1600	right = self.token.contents[self.charnum:]
				1601	else:
				1602	right = (self.token.contents[self.charnum:self.charnum+RANGE]
				1603	+ '...')
				1604	return ('%s\n\n%s%s\n%s^' % (self._descr, left, right, ' '*len(left)))
				1605
				1606	##################################################
				1607	## Convenience parsers
				1608	##################################################
				1609
				1610	def parse_as_literal(str):
				1611	"""
				1612	Return a DOM document matching the epytext DTD, containing a
				1613	single literal block. That literal block will include the
				1614	contents of the given string. This method is typically used as a
				1615	fall-back when the parser fails.
				1616
				1617	@param str: The string which should be enclosed in a literal
				1618	block.
				1619	@type str: C{string}
				1620
				1621	@return: A DOM document containing C{str} in a single literal
				1622	block.
				1623	@rtype: C{Element}
				1624	"""
				1625	return Element('epytext', Element('literalblock', str))
				1626
				1627	def parse_as_para(str):
				1628	"""
				1629	Return a DOM document matching the epytext DTD, containing a
				1630	single paragraph. That paragraph will include the contents of the
				1631	given string. This can be used to wrap some forms of
				1632	automatically generated information (such as type names) in
				1633	paragraphs.
				1634
				1635	@param str: The string which should be enclosed in a paragraph.
				1636	@type str: C{string}
				1637
				1638	@return: A DOM document containing C{str} in a single paragraph.
				1639	@rtype: C{Element}
				1640	"""
				1641	return Element('epytext', Element('para', str))
				1642
				1643	#################################################################
				1644	## SUPPORT FOR EPYDOC
				1645	#################################################################
				1646
				1647	def parse_docstring(docstring, errors, **options):
				1648	"""
				1649	Parse the given docstring, which is formatted using epytext; and
				1650	return a C{ParsedDocstring} representation of its contents.
				1651	@param docstring: The docstring to parse
				1652	@type docstring: C{string}
				1653	@param errors: A list where any errors generated during parsing
				1654	will be stored.
				1655	@type errors: C{list} of L{ParseError}
				1656	@param options: Extra options. Unknown options are ignored.
				1657	Currently, no extra options are defined.
				1658	@rtype: L{ParsedDocstring}
				1659	"""
				1660	return ParsedEpytextDocstring(parse(docstring, errors), **options)
				1661
				1662	class ParsedEpytextDocstring(ParsedDocstring):
				1663	SYMBOL_TO_HTML = {
				1664	# Symbols
				1665	'<-': '←', '->': '→', '^': '↑', 'v': '↓',
				1666
				1667	# Greek letters
				1668	'alpha': 'α', 'beta': 'β', 'gamma': 'γ',
				1669	'delta': 'δ', 'epsilon': 'ε', 'zeta': 'ζ',
				1670	'eta': 'η', 'theta': 'θ', 'iota': 'ι',
				1671	'kappa': 'κ', 'lambda': 'λ', 'mu': 'μ',
				1672	'nu': 'ν', 'xi': 'ξ', 'omicron': 'ο',
				1673	'pi': 'π', 'rho': 'ρ', 'sigma': 'σ',
				1674	'tau': 'τ', 'upsilon': 'υ', 'phi': 'φ',
				1675	'chi': 'χ', 'psi': 'ψ', 'omega': 'ω',
				1676	'Alpha': 'Α', 'Beta': 'Β', 'Gamma': 'Γ',
				1677	'Delta': 'Δ', 'Epsilon': 'Ε', 'Zeta': 'Ζ',
				1678	'Eta': 'Η', 'Theta': 'Θ', 'Iota': 'Ι',
				1679	'Kappa': 'Κ', 'Lambda': 'Λ', 'Mu': 'Μ',
				1680	'Nu': 'Ν', 'Xi': 'Ξ', 'Omicron': 'Ο',
				1681	'Pi': 'Π', 'Rho': 'Ρ', 'Sigma': 'Σ',
				1682	'Tau': 'Τ', 'Upsilon': 'Υ', 'Phi': 'Φ',
				1683	'Chi': 'Χ', 'Psi': 'Ψ', 'Omega': 'Ω',
				1684
				1685	# HTML character entities
				1686	'larr': '←', 'rarr': '→', 'uarr': '↑',
				1687	'darr': '↓', 'harr': '↔', 'crarr': '&crarr;',
				1688	'lArr': '⇐', 'rArr': '⇒', 'uArr': '&uArr;',
				1689	'dArr': '&dArr;', 'hArr': '⇔',
				1690	'copy': '©', 'times': '×', 'forall': '∀',
				1691	'exist': '∃', 'part': '∂',
				1692	'empty': '∅', 'isin': '∈', 'notin': '∉',
				1693	'ni': '&ni;', 'prod': '∏', 'sum': '∑',
				1694	'prop': '&prop;', 'infin': '∞', 'ang': '&ang;',
				1695	'and': '&and;', 'or': '&or;', 'cap': '∩', 'cup': '∪',
				1696	'int': '∫', 'there4': '&there4;', 'sim': '&sim;',
				1697	'cong': '&cong;', 'asymp': '≈', 'ne': '≠',
				1698	'equiv': '&equiv;', 'le': '≤', 'ge': '≥',
				1699	'sub': '⊂', 'sup': '⊃', 'nsub': '&nsub;',
				1700	'sube': '&sube;', 'supe': '&supe;', 'oplus': '&oplus;',
				1701	'otimes': '&otimes;', 'perp': '&perp;',
				1702
				1703	# Alternate (long) names
				1704	'infinity': '∞', 'integral': '∫', 'product': '∏',
				1705	'<=': '≤', '>=': '≥',
				1706	}
				1707
				1708	SYMBOL_TO_LATEX = {
				1709	# Symbols
				1710	'<-': r'$\leftarrow$', '->': r'$\rightarrow$',
				1711	'^': r'$\uparrow$', 'v': r'$\downarrow$',
				1712
				1713	# Greek letters (use lower case when upcase not available)
				1714
				1715	'alpha': r'$\alpha$', 'beta': r'$\beta$', 'gamma':
				1716	r'$\gamma$', 'delta': r'$\delta$', 'epsilon':
				1717	r'$\epsilon$', 'zeta': r'$\zeta$', 'eta': r'$\eta$',
				1718	'theta': r'$\theta$', 'iota': r'$\iota$', 'kappa':
				1719	r'$\kappa$', 'lambda': r'$\lambda$', 'mu': r'$\mu$',
				1720	'nu': r'$\nu$', 'xi': r'$\xi$', 'omicron': r'$o$', 'pi':
				1721	r'$\pi$', 'rho': r'$\rho$', 'sigma': r'$\sigma$', 'tau':
				1722	r'$\tau$', 'upsilon': r'$\upsilon$', 'phi': r'$\phi$',
				1723	'chi': r'$\chi$', 'psi': r'$\psi$', 'omega':
				1724	r'$\omega$',
				1725
				1726	'Alpha': r'$\alpha$', 'Beta': r'$\beta$', 'Gamma':
				1727	r'$\Gamma$', 'Delta': r'$\Delta$', 'Epsilon':
				1728	r'$\epsilon$', 'Zeta': r'$\zeta$', 'Eta': r'$\eta$',
				1729	'Theta': r'$\Theta$', 'Iota': r'$\iota$', 'Kappa':
				1730	r'$\kappa$', 'Lambda': r'$\Lambda$', 'Mu': r'$\mu$',
				1731	'Nu': r'$\nu$', 'Xi': r'$\Xi$', 'Omicron': r'$o$', 'Pi':
				1732	r'$\Pi$', 'ho': r'$\rho$', 'Sigma': r'$\Sigma$', 'Tau':
				1733	r'$\tau$', 'Upsilon': r'$\Upsilon$', 'Phi': r'$\Phi$',
				1734	'Chi': r'$\chi$', 'Psi': r'$\Psi$', 'Omega':
				1735	r'$\Omega$',
				1736
				1737	# HTML character entities
				1738	'larr': r'$\leftarrow$', 'rarr': r'$\rightarrow$', 'uarr':
				1739	r'$\uparrow$', 'darr': r'$\downarrow$', 'harr':
				1740	r'$\leftrightarrow$', 'crarr': r'$\hookleftarrow$',
				1741	'lArr': r'$\Leftarrow$', 'rArr': r'$\Rightarrow$', 'uArr':
				1742	r'$\Uparrow$', 'dArr': r'$\Downarrow$', 'hArr':
				1743	r'$\Leftrightarrow$', 'copy': r'{\textcopyright}',
				1744	'times': r'$\times$', 'forall': r'$\forall$', 'exist':
				1745	r'$\exists$', 'part': r'$\partial$', 'empty':
				1746	r'$\emptyset$', 'isin': r'$\in$', 'notin': r'$\notin$',
				1747	'ni': r'$\ni$', 'prod': r'$\prod$', 'sum': r'$\sum$',
				1748	'prop': r'$\propto$', 'infin': r'$\infty$', 'ang':
				1749	r'$\angle$', 'and': r'$\wedge$', 'or': r'$\vee$', 'cap':
				1750	r'$\cap$', 'cup': r'$\cup$', 'int': r'$\int$', 'there4':
				1751	r'$\therefore$', 'sim': r'$\sim$', 'cong': r'$\cong$',
				1752	'asymp': r'$\approx$', 'ne': r'$\ne$', 'equiv':
				1753	r'$\equiv$', 'le': r'$\le$', 'ge': r'$\ge$', 'sub':
				1754	r'$\subset$', 'sup': r'$\supset$', 'nsub': r'$\supset$',
				1755	'sube': r'$\subseteq$', 'supe': r'$\supseteq$', 'oplus':
				1756	r'$\oplus$', 'otimes': r'$\otimes$', 'perp': r'$\perp$',
				1757
				1758	# Alternate (long) names
				1759	'infinity': r'$\infty$', 'integral': r'$\int$', 'product':
				1760	r'$\prod$', '<=': r'$\le$', '>=': r'$\ge$',
				1761	}
				1762
				1763	def __init__(self, dom_tree, **options):
				1764	self._tree = dom_tree
				1765	# Caching:
				1766	self._html = self._latex = self._plaintext = None
				1767	self._terms = None
				1768	# inline option -- mark top-level children as inline.
				1769	if options.get('inline') and self._tree is not None:
				1770	for elt in self._tree.children:
				1771	elt.attribs['inline'] = True
				1772
				1773	def __str__(self):
				1774	return str(self._tree)
				1775
				1776	def to_html(self, docstring_linker, directory=None, docindex=None,
				1777	context=None, **options):
				1778	if self._html is not None: return self._html
				1779	if self._tree is None: return ''
				1780	indent = options.get('indent', 0)
				1781	self._html = self._to_html(self._tree, docstring_linker, directory,
				1782	docindex, context, indent)
				1783	return self._html
				1784
				1785	def to_latex(self, docstring_linker, **options):
				1786	if self._latex is not None: return self._latex
				1787	if self._tree is None: return ''
				1788	indent = options.get('indent', 0)
				1789	self._hyperref = options.get('hyperref', 1)
				1790	self._latex = self._to_latex(self._tree, docstring_linker, indent)
				1791	return self._latex
				1792
				1793	def to_plaintext(self, docstring_linker, **options):
				1794	# [XX] don't cache -- different options might be used!!
				1795	#if self._plaintext is not None: return self._plaintext
				1796	if self._tree is None: return ''
				1797	if 'indent' in options:
				1798	self._plaintext = to_plaintext(self._tree,
				1799	indent=options['indent'])
				1800	else:
				1801	self._plaintext = to_plaintext(self._tree)
				1802	return self._plaintext
				1803
				1804	def _index_term_key(self, tree):
				1805	str = to_plaintext(tree)
				1806	str = re.sub(r'\s\s+', '-', str)
				1807	return "index-"+re.sub("[^a-zA-Z0-9]", "_", str)
				1808
				1809	def _to_html(self, tree, linker, directory, docindex, context,
				1810	indent=0, seclevel=0):
				1811	if isinstance(tree, basestring):
				1812	return plaintext_to_html(tree)
				1813
				1814	if tree.tag == 'epytext': indent -= 2
				1815	if tree.tag == 'section': seclevel += 1
				1816
				1817	# Process the variables first.
				1818	variables = [self._to_html(c, linker, directory, docindex, context,
				1819	indent+2, seclevel)
				1820	for c in tree.children]
				1821
				1822	# Construct the HTML string for the variables.
				1823	childstr = ''.join(variables)
				1824
				1825	# Perform the approriate action for the DOM tree type.
				1826	if tree.tag == 'para':
				1827	return wordwrap(
				1828	(tree.attribs.get('inline') and '%s' or '<p>%s</p>') % childstr,
				1829	indent)
				1830	elif tree.tag == 'code':
				1831	style = tree.attribs.get('style')
				1832	if style:
				1833	return '<code class="%s">%s</code>' % (style, childstr)
				1834	else:
				1835	return '<code>%s</code>' % childstr
				1836	elif tree.tag == 'uri':
				1837	return ('<a href="%s" target="_top">%s</a>' %
				1838	(variables[1], variables[0]))
				1839	elif tree.tag == 'link':
				1840	return linker.translate_identifier_xref(variables[1], variables[0])
				1841	elif tree.tag == 'italic':
				1842	return '<i>%s</i>' % childstr
				1843	elif tree.tag == 'math':
				1844	return '<i class="math">%s</i>' % childstr
				1845	elif tree.tag == 'indexed':
				1846	term = Element('epytext', tree.children, *tree.attribs)
				1847	return linker.translate_indexterm(ParsedEpytextDocstring(term))
				1848	#term_key = self._index_term_key(tree)
				1849	#return linker.translate_indexterm(childstr, term_key)
				1850	elif tree.tag == 'bold':
				1851	return '<b>%s</b>' % childstr
				1852	elif tree.tag == 'ulist':
				1853	return '%s<ul>\n%s%s</ul>\n' % (indent' ', childstr, indent' ')
				1854	elif tree.tag == 'olist':
				1855	start = tree.attribs.get('start') or ''
				1856	return ('%s<ol start="%s">\n%s%s</ol>\n' %
				1857	(indent' ', start, childstr, indent' '))
				1858	elif tree.tag == 'li':
				1859	return indent' '+'<li>\n%s%s</li>\n' % (childstr, indent' ')
				1860	elif tree.tag == 'heading':
				1861	return ('%s<h%s class="heading">%s</h%s>\n' %
				1862	((indent-2)*' ', seclevel, childstr, seclevel))
				1863	elif tree.tag == 'literalblock':
				1864	return '<pre class="literalblock">\n%s\n</pre>\n' % childstr
				1865	elif tree.tag == 'doctestblock':
				1866	return doctest_to_html(tree.children[0].strip())
				1867	elif tree.tag == 'fieldlist':
				1868	raise AssertionError("There should not be any field lists left")
				1869	elif tree.tag in ('epytext', 'section', 'tag', 'arg',
				1870	'name', 'target', 'html'):
				1871	return childstr
				1872	elif tree.tag == 'symbol':
				1873	symbol = tree.children[0]
				1874	return self.SYMBOL_TO_HTML.get(symbol, '[%s]' % symbol)
				1875	elif tree.tag == 'graph':
				1876	# Generate the graph.
				1877	graph = self._build_graph(variables[0], variables[1:], linker,
				1878	docindex, context)
				1879	if not graph: return ''
				1880	# Write the graph.
				1881	image_url = '%s.gif' % graph.uid
				1882	image_file = os.path.join(directory, image_url)
				1883	return graph.to_html(image_file, image_url)
				1884	else:
				1885	raise ValueError('Unknown epytext DOM element %r' % tree.tag)
				1886
				1887	#GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph']
				1888	def _build_graph(self, graph_type, graph_args, linker,
				1889	docindex, context):
				1890	# Generate the graph
				1891	if graph_type == 'classtree':
				1892	from epydoc.apidoc import ClassDoc
				1893	if graph_args:
				1894	bases = [docindex.find(name, context)
				1895	for name in graph_args]
				1896	elif isinstance(context, ClassDoc):
				1897	bases = [context]
				1898	else:
				1899	log.warning("Could not construct class tree: you must "
				1900	"specify one or more base classes.")
				1901	return None
				1902	from epydoc.docwriter.dotgraph import class_tree_graph
				1903	return class_tree_graph(bases, linker, context)
				1904	elif graph_type == 'packagetree':
				1905	from epydoc.apidoc import ModuleDoc
				1906	if graph_args:
				1907	packages = [docindex.find(name, context)
				1908	for name in graph_args]
				1909	elif isinstance(context, ModuleDoc):
				1910	packages = [context]
				1911	else:
				1912	log.warning("Could not construct package tree: you must "
				1913	"specify one or more root packages.")
				1914	return None
				1915	from epydoc.docwriter.dotgraph import package_tree_graph
				1916	return package_tree_graph(packages, linker, context)
				1917	elif graph_type == 'importgraph':
				1918	from epydoc.apidoc import ModuleDoc
				1919	modules = [d for d in docindex.root if isinstance(d, ModuleDoc)]
				1920	from epydoc.docwriter.dotgraph import import_graph
				1921	return import_graph(modules, docindex, linker, context)
				1922
				1923	elif graph_type == 'callgraph':
				1924	if graph_args:
				1925	docs = [docindex.find(name, context) for name in graph_args]
				1926	docs = [doc for doc in docs if doc is not None]
				1927	else:
				1928	docs = [context]
				1929	from epydoc.docwriter.dotgraph import call_graph
				1930	return call_graph(docs, docindex, linker, context)
				1931	else:
				1932	log.warning("Unknown graph type %s" % graph_type)
				1933
				1934
				1935	def _to_latex(self, tree, linker, indent=0, seclevel=0, breakany=0):
				1936	if isinstance(tree, basestring):
				1937	return plaintext_to_latex(tree, breakany=breakany)
				1938
				1939	if tree.tag == 'section': seclevel += 1
				1940
				1941	# Figure out the child indent level.
				1942	if tree.tag == 'epytext': cindent = indent
				1943	else: cindent = indent + 2
				1944	variables = [self._to_latex(c, linker, cindent, seclevel, breakany)
				1945	for c in tree.children]
				1946	childstr = ''.join(variables)
				1947
				1948	if tree.tag == 'para':
				1949	return wordwrap(childstr, indent)+'\n'
				1950	elif tree.tag == 'code':
				1951	return '\\texttt{%s}' % childstr
				1952	elif tree.tag == 'uri':
				1953	if len(variables) != 2: raise ValueError('Bad URI ')
				1954	if self._hyperref:
				1955	# ~ and # should not be escaped in the URI.
				1956	uri = tree.children[1].children[0]
				1957	uri = uri.replace('{\\textasciitilde}', '~')
				1958	uri = uri.replace('\\#', '#')
				1959	if variables[0] == variables[1]:
				1960	return '\\href{%s}{\\textit{%s}}' % (uri, variables[1])
				1961	else:
				1962	return ('%s\\footnote{\\href{%s}{%s}}' %
				1963	(variables[0], uri, variables[1]))
				1964	else:
				1965	if variables[0] == variables[1]:
				1966	return '\\textit{%s}' % variables[1]
				1967	else:
				1968	return '%s\\footnote{%s}' % (variables[0], variables[1])
				1969	elif tree.tag == 'link':
				1970	if len(variables) != 2: raise ValueError('Bad Link')
				1971	return linker.translate_identifier_xref(variables[1], variables[0])
				1972	elif tree.tag == 'italic':
				1973	return '\\textit{%s}' % childstr
				1974	elif tree.tag == 'math':
				1975	return '\\textit{%s}' % childstr
				1976	elif tree.tag == 'indexed':
				1977	term = Element('epytext', tree.children, *tree.attribs)
				1978	return linker.translate_indexterm(ParsedEpytextDocstring(term))
				1979	elif tree.tag == 'bold':
				1980	return '\\textbf{%s}' % childstr
				1981	elif tree.tag == 'li':
				1982	return indent*' ' + '\\item ' + childstr.lstrip()
				1983	elif tree.tag == 'heading':
				1984	return ' '*(indent-2) + '(section) %s\n\n' % childstr
				1985	elif tree.tag == 'doctestblock':
				1986	return doctest_to_latex(tree.children[0].strip())
				1987	elif tree.tag == 'literalblock':
				1988	return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr
				1989	elif tree.tag == 'fieldlist':
				1990	return indent*' '+'{omitted fieldlist}\n'
				1991	elif tree.tag == 'olist':
				1992	return (' '*indent + '\\begin{enumerate}\n\n' +
				1993	' '*indent + '\\setlength{\\parskip}{0.5ex}\n' +
				1994	childstr +
				1995	' '*indent + '\\end{enumerate}\n\n')
				1996	elif tree.tag == 'ulist':
				1997	return (' '*indent + '\\begin{itemize}\n' +
				1998	' '*indent + '\\setlength{\\parskip}{0.6ex}\n' +
				1999	childstr +
				2000	' '*indent + '\\end{itemize}\n\n')
				2001	elif tree.tag == 'symbol':
				2002	symbol = tree.children[0]
				2003	return self.SYMBOL_TO_LATEX.get(symbol, '[%s]' % symbol)
				2004	elif tree.tag == 'graph':
				2005	return '(GRAPH)'
				2006	#raise ValueError, 'graph not implemented yet for latex'
				2007	else:
				2008	# Assume that anything else can be passed through.
				2009	return childstr
				2010
				2011	_SUMMARY_RE = re.compile(r'(\s[\w\W]?\.)(\s\|$)')
				2012
				2013	def summary(self):
				2014	if self._tree is None: return self, False
				2015	tree = self._tree
				2016	doc = Element('epytext')
				2017
				2018	# Find the first paragraph.
				2019	variables = tree.children
				2020	while (len(variables) > 0) and (variables[0].tag != 'para'):
				2021	if variables[0].tag in ('section', 'ulist', 'olist', 'li'):
				2022	variables = variables[0].children
				2023	else:
				2024	variables = variables[1:]
				2025
				2026	# Special case: if the docstring contains a single literal block,
				2027	# then try extracting the summary from it.
				2028	if (len(variables) == 0 and len(tree.children) == 1 and
				2029	tree.children[0].tag == 'literalblock'):
				2030	str = re.split(r'\n\s(\n\|$).',
				2031	tree.children[0].children[0], 1)[0]
				2032	variables = [Element('para')]
				2033	variables[0].children.append(str)
				2034
				2035	# If we didn't find a paragraph, return an empty epytext.
				2036	if len(variables) == 0: return ParsedEpytextDocstring(doc), False
				2037
				2038	# Is there anything else, excluding tags, after the first variable?
				2039	long_docs = False
				2040	for var in variables[1:]:
				2041	if isinstance(var, Element) and var.tag == 'fieldlist':
				2042	continue
				2043	long_docs = True
				2044	break
				2045
				2046	# Extract the first sentence.
				2047	parachildren = variables[0].children
				2048	para = Element('para', inline=True)
				2049	doc.children.append(para)
				2050	for parachild in parachildren:
				2051	if isinstance(parachild, basestring):
				2052	m = self._SUMMARY_RE.match(parachild)
				2053	if m:
				2054	para.children.append(m.group(1))
				2055	long_docs \|= parachild is not parachildren[-1]
				2056	if not long_docs:
				2057	other = parachild[m.end():]
				2058	if other and not other.isspace():
				2059	long_docs = True
				2060	return ParsedEpytextDocstring(doc), long_docs
				2061	para.children.append(parachild)
				2062
				2063	return ParsedEpytextDocstring(doc), long_docs
				2064
				2065	def split_fields(self, errors=None):
				2066	if self._tree is None: return (self, ())
				2067	tree = Element(self._tree.tag, *self._tree.children,
				2068	**self._tree.attribs)
				2069	fields = []
				2070
				2071	if (tree.children and
				2072	tree.children[-1].tag == 'fieldlist' and
				2073	tree.children[-1].children):
				2074	field_nodes = tree.children[-1].children
				2075	del tree.children[-1]
				2076
				2077	for field in field_nodes:
				2078	# Get the tag
				2079	tag = field.children[0].children[0].lower()
				2080	del field.children[0]
				2081
				2082	# Get the argument.
				2083	if field.children and field.children[0].tag == 'arg':
				2084	arg = field.children[0].children[0]
				2085	del field.children[0]
				2086	else:
				2087	arg = None
				2088
				2089	# Process the field.
				2090	field.tag = 'epytext'
				2091	fields.append(Field(tag, arg, ParsedEpytextDocstring(field)))
				2092
				2093	# Save the remaining docstring as the description..
				2094	if tree.children and tree.children[0].children:
				2095	return ParsedEpytextDocstring(tree), fields
				2096	else:
				2097	return None, fields
				2098
				2099
				2100	def index_terms(self):
				2101	if self._terms is None:
				2102	self._terms = []
				2103	self._index_terms(self._tree, self._terms)
				2104	return self._terms
				2105
				2106	def _index_terms(self, tree, terms):
				2107	if tree is None or isinstance(tree, basestring):
				2108	return
				2109
				2110	if tree.tag == 'indexed':
				2111	term = Element('epytext', tree.children, *tree.attribs)
				2112	terms.append(ParsedEpytextDocstring(term))
				2113
				2114	# Look for index items in child nodes.
				2115	for child in tree.children:
				2116	self._index_terms(child, terms)