Blame - python/helpers/epydoc/docstringparser.py - platform/tools/idea

blob: b609bc9d246befcd29b690e9ee2482c63a7f722a [file] [log] [blame]

Tor Norbye	3a2425a	2013-11-04 10:16:08 -0800	[diff] [blame^]	1	# epydoc -- Docstring processing
				2	#
				3	# Copyright (C) 2005 Edward Loper
				4	# Author: Edward Loper <edloper@loper.org>
				5	# URL: <http://epydoc.sf.net>
				6	#
				7	# $Id: docstringparser.py 1689 2008-01-30 17:01:02Z edloper $
				8
				9	"""
				10	Parse docstrings and handle any fields it defines, such as C{@type}
				11	and C{@author}. Fields are used to describe specific information
				12	about an object. There are two classes of fields: X{simple fields}
				13	and X{special fields}.
				14
				15	Simple fields are fields that get stored directly in an C{APIDoc}'s
				16	metadata dictionary, without any special processing. The set of
				17	simple fields is defined by the list L{STANDARD_FIELDS}, whose
				18	elements are L{DocstringField}s.
				19
				20	Special fields are fields that perform some sort of processing on the
				21	C{APIDoc}, or add information to attributes other than the metadata
				22	dictionary. Special fields are are handled by field handler
				23	functions, which are registered using L{register_field_handler}.
				24	"""
				25	__docformat__ = 'epytext en'
				26
				27
				28	######################################################################
				29	## Imports
				30	######################################################################
				31
				32	import re, sys
				33	from epydoc import markup
				34	from epydoc.markup import epytext
				35	from epydoc.apidoc import *
				36	from epydoc.docintrospecter import introspect_docstring_lineno
				37	from epydoc.util import py_src_filename
				38	from epydoc import log
				39	import epydoc.docparser
				40	import __builtin__, exceptions
				41
				42	######################################################################
				43	# Docstring Fields
				44	######################################################################
				45
				46	class DocstringField:
				47	"""
				48	A simple docstring field, which can be used to describe specific
				49	information about an object, such as its author or its version.
				50	Simple docstring fields are fields that take no arguments, and
				51	are displayed as simple sections.
				52
				53	@ivar tags: The set of tags that can be used to identify this
				54	field.
				55	@ivar singular: The label that should be used to identify this
				56	field in the output, if the field contains one value.
				57	@ivar plural: The label that should be used to identify this
				58	field in the output, if the field contains multiple values.
				59	@ivar short: If true, then multiple values should be combined
				60	into a single comma-delimited list. If false, then
				61	multiple values should be listed separately in a bulleted
				62	list.
				63	@ivar multivalue: If true, then multiple values may be given
				64	for this field; if false, then this field can only take a
				65	single value, and a warning should be issued if it is
				66	redefined.
				67	@ivar takes_arg: If true, then this field expects an argument;
				68	and a separate field section will be constructed for each
				69	argument value. The label (and plural label) should include
				70	a '%s' to mark where the argument's string rep should be
				71	added.
				72	"""
				73	def __init__(self, tags, label, plural=None,
				74	short=0, multivalue=1, takes_arg=0,
				75	varnames=None):
				76	if type(tags) in (list, tuple):
				77	self.tags = tuple(tags)
				78	elif type(tags) is str:
				79	self.tags = (tags,)
				80	else: raise TypeError('Bad tags: %s' % tags)
				81	self.singular = label
				82	if plural is None: self.plural = label
				83	else: self.plural = plural
				84	self.multivalue = multivalue
				85	self.short = short
				86	self.takes_arg = takes_arg
				87	self.varnames = varnames or []
				88
				89	def __cmp__(self, other):
				90	if not isinstance(other, DocstringField): return -1
				91	return cmp(self.tags, other.tags)
				92
				93	def __hash__(self):
				94	return hash(self.tags)
				95
				96	def __repr__(self):
				97	return '<Field: %s>' % self.tags[0]
				98
				99	STANDARD_FIELDS = [
				100	#: A list of the standard simple fields accepted by epydoc. This
				101	#: list can be augmented at run-time by a docstring with the special
				102	#: C{@deffield} field. The order in which fields are listed here
				103	#: determines the order in which they will be displayed in the
				104	#: output.
				105
				106	# If it's deprecated, put that first.
				107	DocstringField(['deprecated', 'depreciated'],
				108	'Deprecated', multivalue=0, varnames=['__deprecated__']),
				109
				110	# Status info
				111	DocstringField(['version'], 'Version', multivalue=0,
				112	varnames=['__version__']),
				113	DocstringField(['date'], 'Date', multivalue=0,
				114	varnames=['__date__']),
				115	DocstringField(['status'], 'Status', multivalue=0),
				116
				117	# Bibliographic Info
				118	DocstringField(['author', 'authors'], 'Author', 'Authors', short=1,
				119	varnames=['__author__', '__authors__']),
				120	DocstringField(['contact'], 'Contact', 'Contacts', short=1,
				121	varnames=['__contact__']),
				122	DocstringField(['organization', 'org'],
				123	'Organization', 'Organizations'),
				124	DocstringField(['copyright', '(c)'], 'Copyright', multivalue=0,
				125	varnames=['__copyright__']),
				126	DocstringField(['license'], 'License', multivalue=0,
				127	varnames=['__license__']),
				128
				129	# Various warnings etc.
				130	DocstringField(['bug'], 'Bug', 'Bugs'),
				131	DocstringField(['warning', 'warn'], 'Warning', 'Warnings'),
				132	DocstringField(['attention'], 'Attention'),
				133	DocstringField(['note'], 'Note', 'Notes'),
				134
				135	# Formal conditions
				136	DocstringField(['requires', 'require', 'requirement'], 'Requires'),
				137	DocstringField(['precondition', 'precond'],
				138	'Precondition', 'Preconditions'),
				139	DocstringField(['postcondition', 'postcond'],
				140	'Postcondition', 'Postconditions'),
				141	DocstringField(['invariant'], 'Invariant'),
				142
				143	# When was it introduced (version # or date)
				144	DocstringField(['since'], 'Since', multivalue=0),
				145
				146	# Changes made
				147	DocstringField(['change', 'changed'], 'Change Log'),
				148
				149	# Crossreferences
				150	DocstringField(['see', 'seealso'], 'See Also', short=1),
				151
				152	# Future Work
				153	DocstringField(['todo'], 'To Do', takes_arg=True),
				154
				155	# Permissions (used by zope-based projects)
				156	DocstringField(['permission', 'permissions'], 'Permission', 'Permissions')
				157	]
				158
				159	######################################################################
				160	#{ Docstring Parsing
				161	######################################################################
				162
				163	DEFAULT_DOCFORMAT = 'epytext'
				164	"""The name of the default markup languge used to process docstrings."""
				165
				166	# [xx] keep track of which ones we've already done, in case we're
				167	# asked to process one twice? e.g., for @include we might have to
				168	# parse the included docstring earlier than we might otherwise..??
				169
				170	def parse_docstring(api_doc, docindex, suppress_warnings=[]):
				171	"""
				172	Process the given C{APIDoc}'s docstring. In particular, populate
				173	the C{APIDoc}'s C{descr} and C{summary} attributes, and add any
				174	information provided by fields in the docstring.
				175
				176	@param docindex: A DocIndex, used to find the containing
				177	module (to look up the docformat); and to find any
				178	user docfields defined by containing objects.
				179	@param suppress_warnings: A set of objects for which docstring
				180	warnings should be suppressed.
				181	"""
				182	if api_doc.metadata is not UNKNOWN:
				183	if not (isinstance(api_doc, RoutineDoc)
				184	and api_doc.canonical_name[-1] == '__init__'):
				185	log.debug("%s's docstring processed twice" %
				186	api_doc.canonical_name)
				187	return
				188
				189	initialize_api_doc(api_doc)
				190
				191	# If there's no docstring, then check for special variables (e.g.,
				192	# __version__), and then return -- there's nothing else to do.
				193	if (api_doc.docstring in (None, UNKNOWN)):
				194	if isinstance(api_doc, NamespaceDoc):
				195	for field in STANDARD_FIELDS + user_docfields(api_doc, docindex):
				196	add_metadata_from_var(api_doc, field)
				197	return
				198
				199	# Remove leading indentation from the docstring.
				200	api_doc.docstring = unindent_docstring(api_doc.docstring)
				201
				202	# Decide which docformat is used by this module.
				203	docformat = get_docformat(api_doc, docindex)
				204
				205	# A list of markup errors from parsing.
				206	parse_errors = []
				207
				208	# Extract a signature from the docstring, if it has one. This
				209	# overrides any signature we got via introspection/parsing.
				210	if isinstance(api_doc, RoutineDoc):
				211	parse_function_signature(api_doc, None, docformat, parse_errors)
				212
				213	# Parse the docstring. Any errors encountered are stored as
				214	# `ParseError` objects in the errors list.
				215	parsed_docstring = markup.parse(api_doc.docstring, docformat,
				216	parse_errors)
				217
				218	# Divide the docstring into a description and a list of
				219	# fields.
				220	descr, fields = parsed_docstring.split_fields(parse_errors)
				221	api_doc.descr = descr
				222
				223	field_warnings = []
				224
				225	# Handle the constructor fields that have been defined in the class
				226	# docstring. This code assumes that a class docstring is parsed before
				227	# the same class __init__ docstring.
				228	if isinstance(api_doc, ClassDoc):
				229
				230	# Parse ahead the __init__ docstring for this class
				231	initvar = api_doc.variables.get('__init__')
				232	if initvar and isinstance(initvar.value, RoutineDoc):
				233	init_api_doc = initvar.value
				234	parse_docstring(init_api_doc, docindex, suppress_warnings)
				235
				236	parse_function_signature(init_api_doc, api_doc,
				237	docformat, parse_errors)
				238	init_fields = split_init_fields(fields, field_warnings)
				239
				240	# Process fields
				241	for field in init_fields:
				242	try:
				243	process_field(init_api_doc, docindex, field.tag(),
				244	field.arg(), field.body())
				245	except ValueError, e: field_warnings.append(str(e))
				246
				247	# Process fields
				248	for field in fields:
				249	try:
				250	process_field(api_doc, docindex, field.tag(),
				251	field.arg(), field.body())
				252	except ValueError, e: field_warnings.append(str(e))
				253
				254	# Check to make sure that all type parameters correspond to
				255	# some documented parameter.
				256	check_type_fields(api_doc, field_warnings)
				257
				258	# Check for special variables (e.g., __version__)
				259	if isinstance(api_doc, NamespaceDoc):
				260	for field in STANDARD_FIELDS + user_docfields(api_doc, docindex):
				261	add_metadata_from_var(api_doc, field)
				262
				263	# Extract a summary
				264	if api_doc.summary is None and api_doc.descr is not None:
				265	api_doc.summary, api_doc.other_docs = api_doc.descr.summary()
				266
				267	# If the summary is empty, but the return field is not, then use
				268	# the return field to generate a summary description.
				269	if (isinstance(api_doc, RoutineDoc) and api_doc.summary is None and
				270	api_doc.return_descr is not None):
				271	s, o = api_doc.return_descr.summary()
				272	api_doc.summary = RETURN_PDS + s
				273	api_doc.other_docs = o
				274
				275	# [XX] Make sure we don't have types/param descrs for unknown
				276	# vars/params?
				277
				278	# Report any errors that occured
				279	if api_doc in suppress_warnings:
				280	if parse_errors or field_warnings:
				281	log.info("Suppressing docstring warnings for %s, since it "
				282	"is not included in the documented set." %
				283	api_doc.canonical_name)
				284	else:
				285	report_errors(api_doc, docindex, parse_errors, field_warnings)
				286
				287	def add_metadata_from_var(api_doc, field):
				288	for varname in field.varnames:
				289	# Check if api_doc has a variable w/ the given name.
				290	if varname not in api_doc.variables: continue
				291
				292	# Check moved here from before the for loop because we expect to
				293	# reach rarely this point. The loop below is to be performed more than
				294	# once only for fields with more than one varname, which currently is
				295	# only 'author'.
				296	for md in api_doc.metadata:
				297	if field == md[0]:
				298	return # We already have a value for this metadata.
				299
				300	var_doc = api_doc.variables[varname]
				301	if var_doc.value is UNKNOWN: continue
				302	val_doc = var_doc.value
				303	value = []
				304
				305	# Try extracting the value from the pyval.
				306	ok_types = (basestring, int, float, bool, type(None))
				307	if val_doc.pyval is not UNKNOWN:
				308	if isinstance(val_doc.pyval, ok_types):
				309	value = [val_doc.pyval]
				310	elif field.multivalue:
				311	if isinstance(val_doc.pyval, (tuple, list)):
				312	for elt in val_doc.pyval:
				313	if not isinstance(elt, ok_types): break
				314	else:
				315	value = list(val_doc.pyval)
				316
				317	# Try extracting the value from the parse tree.
				318	elif val_doc.toktree is not UNKNOWN:
				319	try: value = [epydoc.docparser.parse_string(val_doc.toktree)]
				320	except KeyboardInterrupt: raise
				321	except: pass
				322	if field.multivalue and not value:
				323	try: value = epydoc.docparser.parse_string_list(val_doc.toktree)
				324	except KeyboardInterrupt: raise
				325	except: raise
				326
				327	# Add any values that we found.
				328	for elt in value:
				329	if isinstance(elt, str):
				330	elt = decode_with_backslashreplace(elt)
				331	else:
				332	elt = unicode(elt)
				333	elt = epytext.ParsedEpytextDocstring(
				334	epytext.parse_as_para(elt), inline=True)
				335
				336	# Add in the metadata and remove from the variables
				337	api_doc.metadata.append( (field, varname, elt) )
				338
				339	# Remove the variable itself (unless it's documented)
				340	if var_doc.docstring in (None, UNKNOWN):
				341	del api_doc.variables[varname]
				342	if api_doc.sort_spec is not UNKNOWN:
				343	try: api_doc.sort_spec.remove(varname)
				344	except ValueError: pass
				345
				346	def initialize_api_doc(api_doc):
				347	"""A helper function for L{parse_docstring()} that initializes
				348	the attributes that C{parse_docstring()} will write to."""
				349	if api_doc.descr is UNKNOWN:
				350	api_doc.descr = None
				351	if api_doc.summary is UNKNOWN:
				352	api_doc.summary = None
				353	if api_doc.metadata is UNKNOWN:
				354	api_doc.metadata = []
				355	if isinstance(api_doc, RoutineDoc):
				356	if api_doc.arg_descrs is UNKNOWN:
				357	api_doc.arg_descrs = []
				358	if api_doc.arg_types is UNKNOWN:
				359	api_doc.arg_types = {}
				360	if api_doc.return_descr is UNKNOWN:
				361	api_doc.return_descr = None
				362	if api_doc.return_type is UNKNOWN:
				363	api_doc.return_type = None
				364	if api_doc.exception_descrs is UNKNOWN:
				365	api_doc.exception_descrs = []
				366	if isinstance(api_doc, (VariableDoc, PropertyDoc)):
				367	if api_doc.type_descr is UNKNOWN:
				368	api_doc.type_descr = None
				369	if isinstance(api_doc, NamespaceDoc):
				370	if api_doc.group_specs is UNKNOWN:
				371	api_doc.group_specs = []
				372	if api_doc.sort_spec is UNKNOWN:
				373	api_doc.sort_spec = []
				374
				375	def split_init_fields(fields, warnings):
				376	"""
				377	Remove the fields related to the constructor from a class docstring
				378	fields list.
				379
				380	@param fields: The fields to process. The list will be modified in place
				381	@type fields: C{list} of L{markup.Field}
				382	@param warnings: A list to emit processing warnings
				383	@type warnings: C{list}
				384	@return: The C{fields} items to be applied to the C{__init__} method
				385	@rtype: C{list} of L{markup.Field}
				386	"""
				387	init_fields = []
				388
				389	# Split fields in lists according to their argument, keeping order.
				390	arg_fields = {}
				391	args_order = []
				392	i = 0
				393	while i < len(fields):
				394	field = fields[i]
				395
				396	# gather together all the fields with the same arg
				397	if field.arg() is not None:
				398	arg_fields.setdefault(field.arg(), []).append(fields.pop(i))
				399	args_order.append(field.arg())
				400	else:
				401	i += 1
				402
				403	# Now check that for each argument there is at most a single variable
				404	# and a single parameter, and at most a single type for each of them.
				405	for arg in args_order:
				406	ff = arg_fields.pop(arg, None)
				407	if ff is None:
				408	continue
				409
				410	var = tvar = par = tpar = None
				411	for field in ff:
				412	if field.tag() in VARIABLE_TAGS:
				413	if var is None:
				414	var = field
				415	fields.append(field)
				416	else:
				417	warnings.append(
				418	"There is more than one variable named '%s'"
				419	% arg)
				420	elif field.tag() in PARAMETER_TAGS:
				421	if par is None:
				422	par = field
				423	init_fields.append(field)
				424	else:
				425	warnings.append(
				426	"There is more than one parameter named '%s'"
				427	% arg)
				428
				429	elif field.tag() == 'type':
				430	if var is None and par is None:
				431	# type before obj
				432	tvar = tpar = field
				433	else:
				434	if var is not None and tvar is None:
				435	tvar = field
				436	if par is not None and tpar is None:
				437	tpar = field
				438
				439	elif field.tag() in EXCEPTION_TAGS:
				440	init_fields.append(field)
				441
				442	else: # Unespected field
				443	fields.append(field)
				444
				445	# Put selected types into the proper output lists
				446	if tvar is not None:
				447	if var is not None:
				448	fields.append(tvar)
				449	else:
				450	pass # [xx] warn about type w/o object?
				451
				452	if tpar is not None:
				453	if par is not None:
				454	init_fields.append(tpar)
				455	else:
				456	pass # [xx] warn about type w/o object?
				457
				458	return init_fields
				459
				460	def report_errors(api_doc, docindex, parse_errors, field_warnings):
				461	"""A helper function for L{parse_docstring()} that reports any
				462	markup warnings and field warnings that we encountered while
				463	processing C{api_doc}'s docstring."""
				464	if not parse_errors and not field_warnings: return
				465
				466	# Get the name of the item containing the error, and the
				467	# filename of its containing module.
				468	name = api_doc.canonical_name
				469	module = api_doc.defining_module
				470	if module is not UNKNOWN and module.filename not in (None, UNKNOWN):
				471	try: filename = py_src_filename(module.filename)
				472	except: filename = module.filename
				473	else:
				474	filename = '??'
				475
				476	# [xx] Don't report markup errors for standard builtins.
				477	# n.b. that we must use 'is' to compare pyvals here -- if we use
				478	# 'in' or '==', then a user __cmp__ method might raise an
				479	# exception, or lie.
				480	if isinstance(api_doc, ValueDoc) and api_doc != module:
				481	if module not in (None, UNKNOWN) and module.pyval is exceptions:
				482	return
				483	for builtin_val in __builtin__.__dict__.values():
				484	if builtin_val is api_doc.pyval:
				485	return
				486
				487	# Get the start line of the docstring containing the error.
				488	startline = api_doc.docstring_lineno
				489	if startline in (None, UNKNOWN):
				490	startline = introspect_docstring_lineno(api_doc)
				491	if startline in (None, UNKNOWN):
				492	startline = None
				493
				494	# Display a block header.
				495	header = 'File %s, ' % filename
				496	if startline is not None:
				497	header += 'line %d, ' % startline
				498	header += 'in %s' % name
				499	log.start_block(header)
				500
				501
				502	# Display all parse errors. But first, combine any errors
				503	# with duplicate description messages.
				504	if startline is None:
				505	# remove dups, but keep original order:
				506	dups = {}
				507	for error in parse_errors:
				508	message = error.descr()
				509	if message not in dups:
				510	log.docstring_warning(message)
				511	dups[message] = 1
				512	else:
				513	# Combine line number fields for dup messages:
				514	messages = {} # maps message -> list of linenum
				515	for error in parse_errors:
				516	error.set_linenum_offset(startline)
				517	message = error.descr()
				518	messages.setdefault(message, []).append(error.linenum())
				519	message_items = messages.items()
				520	message_items.sort(lambda a,b:cmp(min(a[1]), min(b[1])))
				521	for message, linenums in message_items:
				522	linenums = [n for n in linenums if n is not None]
				523	if len(linenums) == 0:
				524	log.docstring_warning(message)
				525	elif len(linenums) == 1:
				526	log.docstring_warning("Line %s: %s" % (linenums[0], message))
				527	else:
				528	linenums = ', '.join(['%s' % l for l in linenums])
				529	log.docstring_warning("Lines %s: %s" % (linenums, message))
				530
				531	# Display all field warnings.
				532	for warning in field_warnings:
				533	log.docstring_warning(warning)
				534
				535	# End the message block.
				536	log.end_block()
				537
				538	RETURN_PDS = markup.parse('Returns:', markup='epytext')
				539	"""A ParsedDocstring containing the text 'Returns'. This is used to
				540	construct summary descriptions for routines that have empty C{descr},
				541	but non-empty C{return_descr}."""
				542	RETURN_PDS._tree.children[0].attribs['inline'] = True
				543
				544	######################################################################
				545	#{ Field Processing Error Messages
				546	######################################################################
				547
				548	UNEXPECTED_ARG = '%r did not expect an argument'
				549	EXPECTED_ARG = '%r expected an argument'
				550	EXPECTED_SINGLE_ARG = '%r expected a single argument'
				551	BAD_CONTEXT = 'Invalid context for %r'
				552	REDEFINED = 'Redefinition of %s'
				553	UNKNOWN_TAG = 'Unknown field tag %r'
				554	BAD_PARAM = '@%s for unknown parameter %s'
				555
				556	######################################################################
				557	#{ Field Processing
				558	######################################################################
				559
				560	def process_field(api_doc, docindex, tag, arg, descr):
				561	"""
				562	Process a single field, and use it to update C{api_doc}. If
				563	C{tag} is the name of a special field, then call its handler
				564	function. If C{tag} is the name of a simple field, then use
				565	C{process_simple_field} to process it. Otherwise, check if it's a
				566	user-defined field, defined in this docstring or the docstring of
				567	a containing object; and if so, process it with
				568	C{process_simple_field}.
				569
				570	@param tag: The field's tag, such as C{'author'}
				571	@param arg: The field's optional argument
				572	@param descr: The description following the field tag and
				573	argument.
				574	@raise ValueError: If a problem was encountered while processing
				575	the field. The C{ValueError}'s string argument is an
				576	explanation of the problem, which should be displayed as a
				577	warning message.
				578	"""
				579	# standard special fields
				580	if tag in _field_dispatch_table:
				581	handler = _field_dispatch_table[tag]
				582	handler(api_doc, docindex, tag, arg, descr)
				583	return
				584
				585	# standard simple fields & user-defined fields
				586	for field in STANDARD_FIELDS + user_docfields(api_doc, docindex):
				587	if tag in field.tags:
				588	# [xx] check if it's redefined if it's not multivalue??
				589	if not field.takes_arg:
				590	_check(api_doc, tag, arg, expect_arg=False)
				591	api_doc.metadata.append((field, arg, descr))
				592	return
				593
				594	# If we didn't handle the field, then report a warning.
				595	raise ValueError(UNKNOWN_TAG % tag)
				596
				597	def user_docfields(api_doc, docindex):
				598	"""
				599	Return a list of user defined fields that can be used for the
				600	given object. This list is taken from the given C{api_doc}, and
				601	any of its containing C{NamepaceDoc}s.
				602
				603	@note: We assume here that a parent's docstring will always be
				604	parsed before its childrens'. This is indeed the case when we
				605	are called via L{docbuilder.build_doc_index()}. If a child's
				606	docstring is parsed before its parents, then its parent won't
				607	yet have had its C{extra_docstring_fields} attribute
				608	initialized.
				609	"""
				610	docfields = []
				611	# Get any docfields from `api_doc` itself
				612	if api_doc.extra_docstring_fields not in (None, UNKNOWN):
				613	docfields += api_doc.extra_docstring_fields
				614	# Get any docfields from `api_doc`'s ancestors
				615	for i in range(len(api_doc.canonical_name)-1, 0, -1):
				616	ancestor = docindex.get_valdoc(api_doc.canonical_name[:i])
				617	if ancestor is not None \
				618	and ancestor.extra_docstring_fields not in (None, UNKNOWN):
				619	docfields += ancestor.extra_docstring_fields
				620	return docfields
				621
				622	_field_dispatch_table = {}
				623	def register_field_handler(handler, *field_tags):
				624	"""
				625	Register the given field handler function for processing any
				626	of the given field tags. Field handler functions should
				627	have the following signature:
				628
				629	>>> def field_handler(api_doc, docindex, tag, arg, descr):
				630	... '''update api_doc in response to the field.'''
				631
				632	Where C{api_doc} is the documentation object to update;
				633	C{docindex} is a L{DocIndex} that can be used to look up the
				634	documentation for related objects; C{tag} is the field tag that
				635	was used; C{arg} is the optional argument; and C{descr} is the
				636	description following the field tag and argument.
				637	"""
				638	for field_tag in field_tags:
				639	_field_dispatch_table[field_tag] = handler
				640
				641	######################################################################
				642	#{ Field Handler Functions
				643	######################################################################
				644
				645	def process_summary_field(api_doc, docindex, tag, arg, descr):
				646	"""Store C{descr} in C{api_doc.summary}"""
				647	_check(api_doc, tag, arg, expect_arg=False)
				648	if api_doc.summary is not None:
				649	raise ValueError(REDEFINED % tag)
				650	api_doc.summary = descr
				651
				652	def process_include_field(api_doc, docindex, tag, arg, descr):
				653	"""Copy the docstring contents from the object named in C{descr}"""
				654	_check(api_doc, tag, arg, expect_arg=False)
				655	# options:
				656	# a. just append the descr to our own
				657	# b. append descr and update metadata
				658	# c. append descr and process all fields.
				659	# in any case, mark any errors we may find as coming from an
				660	# imported docstring.
				661
				662	# how does this interact with documentation inheritance??
				663	raise ValueError('%s not implemented yet' % tag)
				664
				665	def process_undocumented_field(api_doc, docindex, tag, arg, descr):
				666	"""Remove any documentation for the variables named in C{descr}"""
				667	_check(api_doc, tag, arg, context=NamespaceDoc, expect_arg=False)
				668	for ident in _descr_to_identifiers(descr):
				669	var_name_re = re.compile('^%s$' % ident.replace('', '(.)'))
				670	for var_name, var_doc in api_doc.variables.items():
				671	if var_name_re.match(var_name):
				672	# Remove the variable from `variables`.
				673	api_doc.variables.pop(var_name, None)
				674	if api_doc.sort_spec is not UNKNOWN:
				675	try: api_doc.sort_spec.remove(var_name)
				676	except ValueError: pass
				677	# For modules, remove any submodules that match var_name_re.
				678	if isinstance(api_doc, ModuleDoc):
				679	removed = set([m for m in api_doc.submodules
				680	if var_name_re.match(m.canonical_name[-1])])
				681	if removed:
				682	# Remove the indicated submodules from this module.
				683	api_doc.submodules = [m for m in api_doc.submodules
				684	if m not in removed]
				685	# Remove all ancestors of the indicated submodules
				686	# from the docindex root. E.g., if module x
				687	# declares y to be undocumented, then x.y.z should
				688	# also be undocumented.
				689	for elt in docindex.root[:]:
				690	for m in removed:
				691	if m.canonical_name.dominates(elt.canonical_name):
				692	docindex.root.remove(elt)
				693
				694	def process_group_field(api_doc, docindex, tag, arg, descr):
				695	"""Define a group named C{arg} containing the variables whose
				696	names are listed in C{descr}."""
				697	_check(api_doc, tag, arg, context=NamespaceDoc, expect_arg=True)
				698	api_doc.group_specs.append( (arg, _descr_to_identifiers(descr)) )
				699	# [xx] should this also set sort order?
				700
				701	def process_deffield_field(api_doc, docindex, tag, arg, descr):
				702	"""Define a new custom field."""
				703	_check(api_doc, tag, arg, expect_arg=True)
				704	if api_doc.extra_docstring_fields is UNKNOWN:
				705	api_doc.extra_docstring_fields = []
				706	try:
				707	docstring_field = _descr_to_docstring_field(arg, descr)
				708	docstring_field.varnames.append("__%s__" % arg)
				709	api_doc.extra_docstring_fields.append(docstring_field)
				710	except ValueError, e:
				711	raise ValueError('Bad %s: %s' % (tag, e))
				712
				713	def process_raise_field(api_doc, docindex, tag, arg, descr):
				714	"""Record the fact that C{api_doc} can raise the exception named
				715	C{tag} in C{api_doc.exception_descrs}."""
				716	_check(api_doc, tag, arg, context=RoutineDoc, expect_arg='single')
				717	try: name = DottedName(arg, strict=True)
				718	except DottedName.InvalidDottedName: name = arg
				719	api_doc.exception_descrs.append( (name, descr) )
				720
				721	def process_sort_field(api_doc, docindex, tag, arg, descr):
				722	_check(api_doc, tag, arg, context=NamespaceDoc, expect_arg=False)
				723	api_doc.sort_spec = _descr_to_identifiers(descr) + api_doc.sort_spec
				724
				725	# [xx] should I notice when they give a type for an unknown var?
				726	def process_type_field(api_doc, docindex, tag, arg, descr):
				727	# In namespace, "@type var: ..." describes the type of a var.
				728	if isinstance(api_doc, NamespaceDoc):
				729	_check(api_doc, tag, arg, expect_arg='single')
				730	set_var_type(api_doc, arg, descr)
				731
				732	# For variables & properties, "@type: ..." describes the variable.
				733	elif isinstance(api_doc, (VariableDoc, PropertyDoc)):
				734	_check(api_doc, tag, arg, expect_arg=False)
				735	if api_doc.type_descr is not None:
				736	raise ValueError(REDEFINED % tag)
				737	api_doc.type_descr = descr
				738
				739	# For routines, "@type param: ..." describes a parameter.
				740	elif isinstance(api_doc, RoutineDoc):
				741	_check(api_doc, tag, arg, expect_arg='single')
				742	if arg in api_doc.arg_types:
				743	raise ValueError(REDEFINED % ('type for '+arg))
				744	api_doc.arg_types[arg] = descr
				745
				746	else:
				747	raise ValueError(BAD_CONTEXT % tag)
				748
				749	def process_var_field(api_doc, docindex, tag, arg, descr):
				750	_check(api_doc, tag, arg, context=ModuleDoc, expect_arg=True)
				751	for ident in re.split('[:;, ] *', arg):
				752	set_var_descr(api_doc, ident, descr)
				753
				754	def process_cvar_field(api_doc, docindex, tag, arg, descr):
				755	# If @cvar is used within a variable, then use it as the
				756	# variable's description, and treat the variable as a class var.
				757	if (isinstance(api_doc, VariableDoc) and
				758	isinstance(api_doc.container, ClassDoc)):
				759	_check(api_doc, tag, arg, expect_arg=False)
				760	api_doc.is_instvar = False
				761	api_doc.descr = markup.ConcatenatedDocstring(api_doc.descr, descr)
				762	api_doc.summary, api_doc.other_docs = descr.summary()
				763
				764	# Otherwise, @cvar should be used in a class.
				765	else:
				766	_check(api_doc, tag, arg, context=ClassDoc, expect_arg=True)
				767	for ident in re.split('[:;, ] *', arg):
				768	set_var_descr(api_doc, ident, descr)
				769	api_doc.variables[ident].is_instvar = False
				770
				771	def process_ivar_field(api_doc, docindex, tag, arg, descr):
				772	# If @ivar is used within a variable, then use it as the
				773	# variable's description, and treat the variable as an instvar.
				774	if (isinstance(api_doc, VariableDoc) and
				775	isinstance(api_doc.container, ClassDoc)):
				776	_check(api_doc, tag, arg, expect_arg=False)
				777	# require that there be no other descr?
				778	api_doc.is_instvar = True
				779	api_doc.descr = markup.ConcatenatedDocstring(api_doc.descr, descr)
				780	api_doc.summary, api_doc.other_docs = descr.summary()
				781
				782	# Otherwise, @ivar should be used in a class.
				783	else:
				784	_check(api_doc, tag, arg, context=ClassDoc, expect_arg=True)
				785	for ident in re.split('[:;, ] *', arg):
				786	set_var_descr(api_doc, ident, descr)
				787	api_doc.variables[ident].is_instvar = True
				788
				789	# [xx] '@return: foo' used to get used as a descr if no other
				790	# descr was present. is that still true?
				791	def process_return_field(api_doc, docindex, tag, arg, descr):
				792	_check(api_doc, tag, arg, context=RoutineDoc, expect_arg=False)
				793	if api_doc.return_descr is not None:
				794	raise ValueError(REDEFINED % 'return value description')
				795	api_doc.return_descr = descr
				796
				797	def process_rtype_field(api_doc, docindex, tag, arg, descr):
				798	_check(api_doc, tag, arg,
				799	context=(RoutineDoc, PropertyDoc), expect_arg=False)
				800	if isinstance(api_doc, RoutineDoc):
				801	if api_doc.return_type is not None:
				802	raise ValueError(REDEFINED % 'return value type')
				803	api_doc.return_type = descr
				804
				805	elif isinstance(api_doc, PropertyDoc):
				806	_check(api_doc, tag, arg, expect_arg=False)
				807	if api_doc.type_descr is not None:
				808	raise ValueError(REDEFINED % tag)
				809	api_doc.type_descr = descr
				810
				811	def process_arg_field(api_doc, docindex, tag, arg, descr):
				812	_check(api_doc, tag, arg, context=RoutineDoc, expect_arg=True)
				813	idents = re.split('[:;, ] *', arg)
				814	api_doc.arg_descrs.append( (idents, descr) )
				815	# Check to make sure that the documented parameter(s) are
				816	# actually part of the function signature.
				817	all_args = api_doc.all_args()
				818	if all_args not in (['...'], UNKNOWN):
				819	bad_params = ['"%s"' % i for i in idents if i not in all_args]
				820	if bad_params:
				821	raise ValueError(BAD_PARAM % (tag, ', '.join(bad_params)))
				822
				823	def process_kwarg_field(api_doc, docindex, tag, arg, descr):
				824	# [xx] these should -not- be checked if they exist..
				825	# and listed separately or not??
				826	_check(api_doc, tag, arg, context=RoutineDoc, expect_arg=True)
				827	idents = re.split('[:;, ] *', arg)
				828	api_doc.arg_descrs.append( (idents, descr) )
				829
				830	register_field_handler(process_group_field, 'group')
				831	register_field_handler(process_deffield_field, 'deffield', 'newfield')
				832	register_field_handler(process_sort_field, 'sort')
				833	register_field_handler(process_summary_field, 'summary')
				834	register_field_handler(process_undocumented_field, 'undocumented')
				835	register_field_handler(process_include_field, 'include')
				836	register_field_handler(process_var_field, 'var', 'variable')
				837	register_field_handler(process_type_field, 'type')
				838	register_field_handler(process_cvar_field, 'cvar', 'cvariable')
				839	register_field_handler(process_ivar_field, 'ivar', 'ivariable')
				840	register_field_handler(process_return_field, 'return', 'returns')
				841	register_field_handler(process_rtype_field, 'rtype', 'returntype')
				842	register_field_handler(process_arg_field, 'arg', 'argument',
				843	'parameter', 'param')
				844	register_field_handler(process_kwarg_field, 'kwarg', 'keyword', 'kwparam')
				845	register_field_handler(process_raise_field, 'raise', 'raises',
				846	'except', 'exception')
				847
				848	# Tags related to function parameters
				849	PARAMETER_TAGS = ('arg', 'argument', 'parameter', 'param',
				850	'kwarg', 'keyword', 'kwparam')
				851
				852	# Tags related to variables in a class
				853	VARIABLE_TAGS = ('cvar', 'cvariable', 'ivar', 'ivariable')
				854
				855	# Tags related to exceptions
				856	EXCEPTION_TAGS = ('raise', 'raises', 'except', 'exception')
				857
				858	######################################################################
				859	#{ Helper Functions
				860	######################################################################
				861
				862	def check_type_fields(api_doc, field_warnings):
				863	"""Check to make sure that all type fields correspond to some
				864	documented parameter; if not, append a warning to field_warnings."""
				865	if isinstance(api_doc, RoutineDoc):
				866	for arg in api_doc.arg_types:
				867	if arg not in api_doc.all_args():
				868	for args, descr in api_doc.arg_descrs:
				869	if arg in args:
				870	break
				871	else:
				872	field_warnings.append(BAD_PARAM % ('type', '"%s"' % arg))
				873
				874	def set_var_descr(api_doc, ident, descr):
				875	if ident not in api_doc.variables:
				876	api_doc.variables[ident] = VariableDoc(
				877	container=api_doc, name=ident,
				878	canonical_name=api_doc.canonical_name+ident)
				879
				880	var_doc = api_doc.variables[ident]
				881	if var_doc.descr not in (None, UNKNOWN):
				882	raise ValueError(REDEFINED % ('description for '+ident))
				883	var_doc.descr = descr
				884	if var_doc.summary in (None, UNKNOWN):
				885	var_doc.summary, var_doc.other_docs = var_doc.descr.summary()
				886
				887	def set_var_type(api_doc, ident, descr):
				888	if ident not in api_doc.variables:
				889	api_doc.variables[ident] = VariableDoc(
				890	container=api_doc, name=ident,
				891	canonical_name=api_doc.canonical_name+ident)
				892
				893	var_doc = api_doc.variables[ident]
				894	if var_doc.type_descr not in (None, UNKNOWN):
				895	raise ValueError(REDEFINED % ('type for '+ident))
				896	var_doc.type_descr = descr
				897
				898	def _check(api_doc, tag, arg, context=None, expect_arg=None):
				899	if context is not None:
				900	if not isinstance(api_doc, context):
				901	raise ValueError(BAD_CONTEXT % tag)
				902	if expect_arg is not None:
				903	if expect_arg == True:
				904	if arg is None:
				905	raise ValueError(EXPECTED_ARG % tag)
				906	elif expect_arg == False:
				907	if arg is not None:
				908	raise ValueError(UNEXPECTED_ARG % tag)
				909	elif expect_arg == 'single':
				910	if (arg is None or ' ' in arg):
				911	raise ValueError(EXPECTED_SINGLE_ARG % tag)
				912	else:
				913	assert 0, 'bad value for expect_arg'
				914
				915	def get_docformat(api_doc, docindex):
				916	"""
				917	Return the name of the markup language that should be used to
				918	parse the API documentation for the given object.
				919	"""
				920	# Find the module that defines api_doc.
				921	module = api_doc.defining_module
				922	# Look up its docformat.
				923	if module is not UNKNOWN and module.docformat not in (None, UNKNOWN):
				924	docformat = module.docformat
				925	else:
				926	docformat = DEFAULT_DOCFORMAT
				927	# Convert to lower case & strip region codes.
				928	try: return docformat.lower().split()[0]
				929	except: return DEFAULT_DOCFORMAT
				930
				931	def unindent_docstring(docstring):
				932	# [xx] copied from inspect.getdoc(); we can't use inspect.getdoc()
				933	# itself, since it expects an object, not a string.
				934
				935	if not docstring: return ''
				936	lines = docstring.expandtabs().split('\n')
				937
				938	# Find minimum indentation of any non-blank lines after first line.
				939	margin = sys.maxint
				940	for line in lines[1:]:
				941	content = len(line.lstrip())
				942	if content:
				943	indent = len(line) - content
				944	margin = min(margin, indent)
				945	# Remove indentation.
				946	if lines:
				947	lines[0] = lines[0].lstrip()
				948	if margin < sys.maxint:
				949	for i in range(1, len(lines)): lines[i] = lines[i][margin:]
				950	# Remove any trailing (but not leading!) blank lines.
				951	while lines and not lines[-1]:
				952	lines.pop()
				953	#while lines and not lines[0]:
				954	# lines.pop(0)
				955	return '\n'.join(lines)
				956
				957	_IDENTIFIER_LIST_REGEXP = re.compile(r'^[\w.\]+([\s,:;]\s[\w.\]+)$')
				958	def _descr_to_identifiers(descr):
				959	"""
				960	Given a C{ParsedDocstring} that contains a list of identifiers,
				961	return a list of those identifiers. This is used by fields such
				962	as C{@group} and C{@sort}, which expect lists of identifiers as
				963	their values. To extract the identifiers, the docstring is first
				964	converted to plaintext, and then split. The plaintext content of
				965	the docstring must be a a list of identifiers, separated by
				966	spaces, commas, colons, or semicolons.
				967
				968	@rtype: C{list} of C{string}
				969	@return: A list of the identifier names contained in C{descr}.
				970	@type descr: L{markup.ParsedDocstring}
				971	@param descr: A C{ParsedDocstring} containing a list of
				972	identifiers.
				973	@raise ValueError: If C{descr} does not contain a valid list of
				974	identifiers.
				975	"""
				976	idents = descr.to_plaintext(None).strip()
				977	idents = re.sub(r'\s+', ' ', idents)
				978	if not _IDENTIFIER_LIST_REGEXP.match(idents):
				979	raise ValueError, 'Bad Identifier list: %r' % idents
				980	rval = re.split('[:;, ] *', idents)
				981	return rval
				982
				983	def _descr_to_docstring_field(arg, descr):
				984	tags = [s.lower() for s in re.split('[:;, ] *', arg)]
				985	descr = descr.to_plaintext(None).strip()
				986	args = re.split('[:;,] *', descr)
				987	if len(args) == 0 or len(args) > 3:
				988	raise ValueError, 'Wrong number of arguments'
				989	singular = args[0]
				990	if len(args) >= 2: plural = args[1]
				991	else: plural = None
				992	short = 0
				993	if len(args) >= 3:
				994	if args[2] == 'short': short = 1
				995	else: raise ValueError('Bad arg 2 (expected "short")')
				996	return DocstringField(tags, singular, plural, short)
				997
				998	######################################################################
				999	#{ Function Signature Extraction
				1000	######################################################################
				1001
				1002	# [XX] todo: add optional type modifiers?
				1003	_SIGNATURE_RE = re.compile(
				1004	# Class name (for builtin methods)
				1005	r'^\s*((?P<self>\w+)\.)?' +
				1006	# The function name (must match exactly) [XX] not anymore!
				1007	r'(?P<func>\w+)' +
				1008	# The parameters
				1009	r'\((?P<params>(\s\[?\s\{0,2}[\w\-\.]+(\s=.+?)?'+
				1010	r'(\s\[?\s,\s\]?\s\{0,2}[\w\-\.]+(\s=.+?)?)\])?)\s*\)' +
				1011	# The return value (optional)
				1012	r'(\s(->)\s(?P<return>\S.*?))?'+
				1013	# The end marker
				1014	r'\s*(\n\|\s+(--\|<=+>)\s+\|$\|\.\s+\|\.\n)')
				1015	"""A regular expression that is used to extract signatures from
				1016	docstrings."""
				1017
				1018	def parse_function_signature(func_doc, doc_source, docformat, parse_errors):
				1019	"""
				1020	Construct the signature for a builtin function or method from
				1021	its docstring. If the docstring uses the standard convention
				1022	of including a signature in the first line of the docstring
				1023	(and formats that signature according to standard
				1024	conventions), then it will be used to extract a signature.
				1025	Otherwise, the signature will be set to a single varargs
				1026	variable named C{"..."}.
				1027
				1028	@param func_doc: The target object where to store parsed signature. Also
				1029	container of the docstring to parse if doc_source is C{None}
				1030	@type func_doc: L{RoutineDoc}
				1031	@param doc_source: Contains the docstring to parse. If C{None}, parse
				1032	L{func_doc} docstring instead
				1033	@type doc_source: L{APIDoc}
				1034	@rtype: C{None}
				1035	"""
				1036	if doc_source is None:
				1037	doc_source = func_doc
				1038
				1039	# If there's no docstring, then don't do anything.
				1040	if not doc_source.docstring: return False
				1041
				1042	m = _SIGNATURE_RE.match(doc_source.docstring)
				1043	if m is None: return False
				1044
				1045	# Do I want to be this strict?
				1046	# Notice that __init__ must match the class name instead, if the signature
				1047	# comes from the class docstring
				1048	# if not (m.group('func') == func_doc.canonical_name[-1] or
				1049	# '_'+m.group('func') == func_doc.canonical_name[-1]):
				1050	# log.warning("Not extracting function signature from %s's "
				1051	# "docstring, since the name doesn't match." %
				1052	# func_doc.canonical_name)
				1053	# return False
				1054
				1055	params = m.group('params')
				1056	rtype = m.group('return')
				1057	selfparam = m.group('self')
				1058
				1059	# Extract the parameters from the signature.
				1060	func_doc.posargs = []
				1061	func_doc.vararg = None
				1062	func_doc.kwarg = None
				1063	if func_doc.posarg_defaults is UNKNOWN:
				1064	func_doc.posarg_defaults = []
				1065	if params:
				1066	# Figure out which parameters are optional.
				1067	while '[' in params or ']' in params:
				1068	m2 = re.match(r'(.)\[([^\[\]]+)\](.)', params)
				1069	if not m2: return False
				1070	(start, mid, end) = m2.groups()
				1071	mid = re.sub(r'((,\|^)\s*[\w\-\.]+)', r'\1=...', mid)
				1072	params = start+mid+end
				1073
				1074	params = re.sub(r'=...=' , r'=', params)
				1075	for name in params.split(','):
				1076	if '=' in name:
				1077	(name, default_repr) = name.split('=',1)
				1078	default = GenericValueDoc(parse_repr=default_repr)
				1079	else:
				1080	default = None
				1081	name = name.strip()
				1082	if name == '...':
				1083	func_doc.vararg = '...'
				1084	elif name.startswith('**'):
				1085	func_doc.kwarg = name[2:]
				1086	elif name.startswith('*'):
				1087	func_doc.vararg = name[1:]
				1088	else:
				1089	func_doc.posargs.append(name)
				1090	if len(func_doc.posarg_defaults) < len(func_doc.posargs):
				1091	func_doc.posarg_defaults.append(default)
				1092	elif default is not None:
				1093	argnum = len(func_doc.posargs)-1
				1094	func_doc.posarg_defaults[argnum] = default
				1095
				1096	# Extract the return type/value from the signature
				1097	if rtype:
				1098	func_doc.return_type = markup.parse(rtype, docformat, parse_errors,
				1099	inline=True)
				1100
				1101	# Add the self parameter, if it was specified.
				1102	if selfparam:
				1103	func_doc.posargs.insert(0, selfparam)
				1104	func_doc.posarg_defaults.insert(0, None)
				1105
				1106	# Remove the signature from the docstring.
				1107	doc_source.docstring = doc_source.docstring[m.end():]
				1108
				1109	# We found a signature.
				1110	return True
				1111