blob: b609bc9d246befcd29b690e9ee2482c63a7f722a [file] [log] [blame]
Tor Norbye3a2425a2013-11-04 10:16:08 -08001# epydoc -- Docstring processing
2#
3# Copyright (C) 2005 Edward Loper
4# Author: Edward Loper <edloper@loper.org>
5# URL: <http://epydoc.sf.net>
6#
7# $Id: docstringparser.py 1689 2008-01-30 17:01:02Z edloper $
8
9"""
10Parse docstrings and handle any fields it defines, such as C{@type}
11and C{@author}. Fields are used to describe specific information
12about an object. There are two classes of fields: X{simple fields}
13and X{special fields}.
14
15Simple fields are fields that get stored directly in an C{APIDoc}'s
16metadata dictionary, without any special processing. The set of
17simple fields is defined by the list L{STANDARD_FIELDS}, whose
18elements are L{DocstringField}s.
19
20Special fields are fields that perform some sort of processing on the
21C{APIDoc}, or add information to attributes other than the metadata
22dictionary. Special fields are are handled by field handler
23functions, which are registered using L{register_field_handler}.
24"""
25__docformat__ = 'epytext en'
26
27
28######################################################################
29## Imports
30######################################################################
31
32import re, sys
33from epydoc import markup
34from epydoc.markup import epytext
35from epydoc.apidoc import *
36from epydoc.docintrospecter import introspect_docstring_lineno
37from epydoc.util import py_src_filename
38from epydoc import log
39import epydoc.docparser
40import __builtin__, exceptions
41
42######################################################################
43# Docstring Fields
44######################################################################
45
46class DocstringField:
47 """
48 A simple docstring field, which can be used to describe specific
49 information about an object, such as its author or its version.
50 Simple docstring fields are fields that take no arguments, and
51 are displayed as simple sections.
52
53 @ivar tags: The set of tags that can be used to identify this
54 field.
55 @ivar singular: The label that should be used to identify this
56 field in the output, if the field contains one value.
57 @ivar plural: The label that should be used to identify this
58 field in the output, if the field contains multiple values.
59 @ivar short: If true, then multiple values should be combined
60 into a single comma-delimited list. If false, then
61 multiple values should be listed separately in a bulleted
62 list.
63 @ivar multivalue: If true, then multiple values may be given
64 for this field; if false, then this field can only take a
65 single value, and a warning should be issued if it is
66 redefined.
67 @ivar takes_arg: If true, then this field expects an argument;
68 and a separate field section will be constructed for each
69 argument value. The label (and plural label) should include
70 a '%s' to mark where the argument's string rep should be
71 added.
72 """
73 def __init__(self, tags, label, plural=None,
74 short=0, multivalue=1, takes_arg=0,
75 varnames=None):
76 if type(tags) in (list, tuple):
77 self.tags = tuple(tags)
78 elif type(tags) is str:
79 self.tags = (tags,)
80 else: raise TypeError('Bad tags: %s' % tags)
81 self.singular = label
82 if plural is None: self.plural = label
83 else: self.plural = plural
84 self.multivalue = multivalue
85 self.short = short
86 self.takes_arg = takes_arg
87 self.varnames = varnames or []
88
89 def __cmp__(self, other):
90 if not isinstance(other, DocstringField): return -1
91 return cmp(self.tags, other.tags)
92
93 def __hash__(self):
94 return hash(self.tags)
95
96 def __repr__(self):
97 return '<Field: %s>' % self.tags[0]
98
99STANDARD_FIELDS = [
100 #: A list of the standard simple fields accepted by epydoc. This
101 #: list can be augmented at run-time by a docstring with the special
102 #: C{@deffield} field. The order in which fields are listed here
103 #: determines the order in which they will be displayed in the
104 #: output.
105
106 # If it's deprecated, put that first.
107 DocstringField(['deprecated', 'depreciated'],
108 'Deprecated', multivalue=0, varnames=['__deprecated__']),
109
110 # Status info
111 DocstringField(['version'], 'Version', multivalue=0,
112 varnames=['__version__']),
113 DocstringField(['date'], 'Date', multivalue=0,
114 varnames=['__date__']),
115 DocstringField(['status'], 'Status', multivalue=0),
116
117 # Bibliographic Info
118 DocstringField(['author', 'authors'], 'Author', 'Authors', short=1,
119 varnames=['__author__', '__authors__']),
120 DocstringField(['contact'], 'Contact', 'Contacts', short=1,
121 varnames=['__contact__']),
122 DocstringField(['organization', 'org'],
123 'Organization', 'Organizations'),
124 DocstringField(['copyright', '(c)'], 'Copyright', multivalue=0,
125 varnames=['__copyright__']),
126 DocstringField(['license'], 'License', multivalue=0,
127 varnames=['__license__']),
128
129 # Various warnings etc.
130 DocstringField(['bug'], 'Bug', 'Bugs'),
131 DocstringField(['warning', 'warn'], 'Warning', 'Warnings'),
132 DocstringField(['attention'], 'Attention'),
133 DocstringField(['note'], 'Note', 'Notes'),
134
135 # Formal conditions
136 DocstringField(['requires', 'require', 'requirement'], 'Requires'),
137 DocstringField(['precondition', 'precond'],
138 'Precondition', 'Preconditions'),
139 DocstringField(['postcondition', 'postcond'],
140 'Postcondition', 'Postconditions'),
141 DocstringField(['invariant'], 'Invariant'),
142
143 # When was it introduced (version # or date)
144 DocstringField(['since'], 'Since', multivalue=0),
145
146 # Changes made
147 DocstringField(['change', 'changed'], 'Change Log'),
148
149 # Crossreferences
150 DocstringField(['see', 'seealso'], 'See Also', short=1),
151
152 # Future Work
153 DocstringField(['todo'], 'To Do', takes_arg=True),
154
155 # Permissions (used by zope-based projects)
156 DocstringField(['permission', 'permissions'], 'Permission', 'Permissions')
157 ]
158
159######################################################################
160#{ Docstring Parsing
161######################################################################
162
163DEFAULT_DOCFORMAT = 'epytext'
164"""The name of the default markup languge used to process docstrings."""
165
166# [xx] keep track of which ones we've already done, in case we're
167# asked to process one twice? e.g., for @include we might have to
168# parse the included docstring earlier than we might otherwise..??
169
170def parse_docstring(api_doc, docindex, suppress_warnings=[]):
171 """
172 Process the given C{APIDoc}'s docstring. In particular, populate
173 the C{APIDoc}'s C{descr} and C{summary} attributes, and add any
174 information provided by fields in the docstring.
175
176 @param docindex: A DocIndex, used to find the containing
177 module (to look up the docformat); and to find any
178 user docfields defined by containing objects.
179 @param suppress_warnings: A set of objects for which docstring
180 warnings should be suppressed.
181 """
182 if api_doc.metadata is not UNKNOWN:
183 if not (isinstance(api_doc, RoutineDoc)
184 and api_doc.canonical_name[-1] == '__init__'):
185 log.debug("%s's docstring processed twice" %
186 api_doc.canonical_name)
187 return
188
189 initialize_api_doc(api_doc)
190
191 # If there's no docstring, then check for special variables (e.g.,
192 # __version__), and then return -- there's nothing else to do.
193 if (api_doc.docstring in (None, UNKNOWN)):
194 if isinstance(api_doc, NamespaceDoc):
195 for field in STANDARD_FIELDS + user_docfields(api_doc, docindex):
196 add_metadata_from_var(api_doc, field)
197 return
198
199 # Remove leading indentation from the docstring.
200 api_doc.docstring = unindent_docstring(api_doc.docstring)
201
202 # Decide which docformat is used by this module.
203 docformat = get_docformat(api_doc, docindex)
204
205 # A list of markup errors from parsing.
206 parse_errors = []
207
208 # Extract a signature from the docstring, if it has one. This
209 # overrides any signature we got via introspection/parsing.
210 if isinstance(api_doc, RoutineDoc):
211 parse_function_signature(api_doc, None, docformat, parse_errors)
212
213 # Parse the docstring. Any errors encountered are stored as
214 # `ParseError` objects in the errors list.
215 parsed_docstring = markup.parse(api_doc.docstring, docformat,
216 parse_errors)
217
218 # Divide the docstring into a description and a list of
219 # fields.
220 descr, fields = parsed_docstring.split_fields(parse_errors)
221 api_doc.descr = descr
222
223 field_warnings = []
224
225 # Handle the constructor fields that have been defined in the class
226 # docstring. This code assumes that a class docstring is parsed before
227 # the same class __init__ docstring.
228 if isinstance(api_doc, ClassDoc):
229
230 # Parse ahead the __init__ docstring for this class
231 initvar = api_doc.variables.get('__init__')
232 if initvar and isinstance(initvar.value, RoutineDoc):
233 init_api_doc = initvar.value
234 parse_docstring(init_api_doc, docindex, suppress_warnings)
235
236 parse_function_signature(init_api_doc, api_doc,
237 docformat, parse_errors)
238 init_fields = split_init_fields(fields, field_warnings)
239
240 # Process fields
241 for field in init_fields:
242 try:
243 process_field(init_api_doc, docindex, field.tag(),
244 field.arg(), field.body())
245 except ValueError, e: field_warnings.append(str(e))
246
247 # Process fields
248 for field in fields:
249 try:
250 process_field(api_doc, docindex, field.tag(),
251 field.arg(), field.body())
252 except ValueError, e: field_warnings.append(str(e))
253
254 # Check to make sure that all type parameters correspond to
255 # some documented parameter.
256 check_type_fields(api_doc, field_warnings)
257
258 # Check for special variables (e.g., __version__)
259 if isinstance(api_doc, NamespaceDoc):
260 for field in STANDARD_FIELDS + user_docfields(api_doc, docindex):
261 add_metadata_from_var(api_doc, field)
262
263 # Extract a summary
264 if api_doc.summary is None and api_doc.descr is not None:
265 api_doc.summary, api_doc.other_docs = api_doc.descr.summary()
266
267 # If the summary is empty, but the return field is not, then use
268 # the return field to generate a summary description.
269 if (isinstance(api_doc, RoutineDoc) and api_doc.summary is None and
270 api_doc.return_descr is not None):
271 s, o = api_doc.return_descr.summary()
272 api_doc.summary = RETURN_PDS + s
273 api_doc.other_docs = o
274
275 # [XX] Make sure we don't have types/param descrs for unknown
276 # vars/params?
277
278 # Report any errors that occured
279 if api_doc in suppress_warnings:
280 if parse_errors or field_warnings:
281 log.info("Suppressing docstring warnings for %s, since it "
282 "is not included in the documented set." %
283 api_doc.canonical_name)
284 else:
285 report_errors(api_doc, docindex, parse_errors, field_warnings)
286
287def add_metadata_from_var(api_doc, field):
288 for varname in field.varnames:
289 # Check if api_doc has a variable w/ the given name.
290 if varname not in api_doc.variables: continue
291
292 # Check moved here from before the for loop because we expect to
293 # reach rarely this point. The loop below is to be performed more than
294 # once only for fields with more than one varname, which currently is
295 # only 'author'.
296 for md in api_doc.metadata:
297 if field == md[0]:
298 return # We already have a value for this metadata.
299
300 var_doc = api_doc.variables[varname]
301 if var_doc.value is UNKNOWN: continue
302 val_doc = var_doc.value
303 value = []
304
305 # Try extracting the value from the pyval.
306 ok_types = (basestring, int, float, bool, type(None))
307 if val_doc.pyval is not UNKNOWN:
308 if isinstance(val_doc.pyval, ok_types):
309 value = [val_doc.pyval]
310 elif field.multivalue:
311 if isinstance(val_doc.pyval, (tuple, list)):
312 for elt in val_doc.pyval:
313 if not isinstance(elt, ok_types): break
314 else:
315 value = list(val_doc.pyval)
316
317 # Try extracting the value from the parse tree.
318 elif val_doc.toktree is not UNKNOWN:
319 try: value = [epydoc.docparser.parse_string(val_doc.toktree)]
320 except KeyboardInterrupt: raise
321 except: pass
322 if field.multivalue and not value:
323 try: value = epydoc.docparser.parse_string_list(val_doc.toktree)
324 except KeyboardInterrupt: raise
325 except: raise
326
327 # Add any values that we found.
328 for elt in value:
329 if isinstance(elt, str):
330 elt = decode_with_backslashreplace(elt)
331 else:
332 elt = unicode(elt)
333 elt = epytext.ParsedEpytextDocstring(
334 epytext.parse_as_para(elt), inline=True)
335
336 # Add in the metadata and remove from the variables
337 api_doc.metadata.append( (field, varname, elt) )
338
339 # Remove the variable itself (unless it's documented)
340 if var_doc.docstring in (None, UNKNOWN):
341 del api_doc.variables[varname]
342 if api_doc.sort_spec is not UNKNOWN:
343 try: api_doc.sort_spec.remove(varname)
344 except ValueError: pass
345
346def initialize_api_doc(api_doc):
347 """A helper function for L{parse_docstring()} that initializes
348 the attributes that C{parse_docstring()} will write to."""
349 if api_doc.descr is UNKNOWN:
350 api_doc.descr = None
351 if api_doc.summary is UNKNOWN:
352 api_doc.summary = None
353 if api_doc.metadata is UNKNOWN:
354 api_doc.metadata = []
355 if isinstance(api_doc, RoutineDoc):
356 if api_doc.arg_descrs is UNKNOWN:
357 api_doc.arg_descrs = []
358 if api_doc.arg_types is UNKNOWN:
359 api_doc.arg_types = {}
360 if api_doc.return_descr is UNKNOWN:
361 api_doc.return_descr = None
362 if api_doc.return_type is UNKNOWN:
363 api_doc.return_type = None
364 if api_doc.exception_descrs is UNKNOWN:
365 api_doc.exception_descrs = []
366 if isinstance(api_doc, (VariableDoc, PropertyDoc)):
367 if api_doc.type_descr is UNKNOWN:
368 api_doc.type_descr = None
369 if isinstance(api_doc, NamespaceDoc):
370 if api_doc.group_specs is UNKNOWN:
371 api_doc.group_specs = []
372 if api_doc.sort_spec is UNKNOWN:
373 api_doc.sort_spec = []
374
375def split_init_fields(fields, warnings):
376 """
377 Remove the fields related to the constructor from a class docstring
378 fields list.
379
380 @param fields: The fields to process. The list will be modified in place
381 @type fields: C{list} of L{markup.Field}
382 @param warnings: A list to emit processing warnings
383 @type warnings: C{list}
384 @return: The C{fields} items to be applied to the C{__init__} method
385 @rtype: C{list} of L{markup.Field}
386 """
387 init_fields = []
388
389 # Split fields in lists according to their argument, keeping order.
390 arg_fields = {}
391 args_order = []
392 i = 0
393 while i < len(fields):
394 field = fields[i]
395
396 # gather together all the fields with the same arg
397 if field.arg() is not None:
398 arg_fields.setdefault(field.arg(), []).append(fields.pop(i))
399 args_order.append(field.arg())
400 else:
401 i += 1
402
403 # Now check that for each argument there is at most a single variable
404 # and a single parameter, and at most a single type for each of them.
405 for arg in args_order:
406 ff = arg_fields.pop(arg, None)
407 if ff is None:
408 continue
409
410 var = tvar = par = tpar = None
411 for field in ff:
412 if field.tag() in VARIABLE_TAGS:
413 if var is None:
414 var = field
415 fields.append(field)
416 else:
417 warnings.append(
418 "There is more than one variable named '%s'"
419 % arg)
420 elif field.tag() in PARAMETER_TAGS:
421 if par is None:
422 par = field
423 init_fields.append(field)
424 else:
425 warnings.append(
426 "There is more than one parameter named '%s'"
427 % arg)
428
429 elif field.tag() == 'type':
430 if var is None and par is None:
431 # type before obj
432 tvar = tpar = field
433 else:
434 if var is not None and tvar is None:
435 tvar = field
436 if par is not None and tpar is None:
437 tpar = field
438
439 elif field.tag() in EXCEPTION_TAGS:
440 init_fields.append(field)
441
442 else: # Unespected field
443 fields.append(field)
444
445 # Put selected types into the proper output lists
446 if tvar is not None:
447 if var is not None:
448 fields.append(tvar)
449 else:
450 pass # [xx] warn about type w/o object?
451
452 if tpar is not None:
453 if par is not None:
454 init_fields.append(tpar)
455 else:
456 pass # [xx] warn about type w/o object?
457
458 return init_fields
459
460def report_errors(api_doc, docindex, parse_errors, field_warnings):
461 """A helper function for L{parse_docstring()} that reports any
462 markup warnings and field warnings that we encountered while
463 processing C{api_doc}'s docstring."""
464 if not parse_errors and not field_warnings: return
465
466 # Get the name of the item containing the error, and the
467 # filename of its containing module.
468 name = api_doc.canonical_name
469 module = api_doc.defining_module
470 if module is not UNKNOWN and module.filename not in (None, UNKNOWN):
471 try: filename = py_src_filename(module.filename)
472 except: filename = module.filename
473 else:
474 filename = '??'
475
476 # [xx] Don't report markup errors for standard builtins.
477 # n.b. that we must use 'is' to compare pyvals here -- if we use
478 # 'in' or '==', then a user __cmp__ method might raise an
479 # exception, or lie.
480 if isinstance(api_doc, ValueDoc) and api_doc != module:
481 if module not in (None, UNKNOWN) and module.pyval is exceptions:
482 return
483 for builtin_val in __builtin__.__dict__.values():
484 if builtin_val is api_doc.pyval:
485 return
486
487 # Get the start line of the docstring containing the error.
488 startline = api_doc.docstring_lineno
489 if startline in (None, UNKNOWN):
490 startline = introspect_docstring_lineno(api_doc)
491 if startline in (None, UNKNOWN):
492 startline = None
493
494 # Display a block header.
495 header = 'File %s, ' % filename
496 if startline is not None:
497 header += 'line %d, ' % startline
498 header += 'in %s' % name
499 log.start_block(header)
500
501
502 # Display all parse errors. But first, combine any errors
503 # with duplicate description messages.
504 if startline is None:
505 # remove dups, but keep original order:
506 dups = {}
507 for error in parse_errors:
508 message = error.descr()
509 if message not in dups:
510 log.docstring_warning(message)
511 dups[message] = 1
512 else:
513 # Combine line number fields for dup messages:
514 messages = {} # maps message -> list of linenum
515 for error in parse_errors:
516 error.set_linenum_offset(startline)
517 message = error.descr()
518 messages.setdefault(message, []).append(error.linenum())
519 message_items = messages.items()
520 message_items.sort(lambda a,b:cmp(min(a[1]), min(b[1])))
521 for message, linenums in message_items:
522 linenums = [n for n in linenums if n is not None]
523 if len(linenums) == 0:
524 log.docstring_warning(message)
525 elif len(linenums) == 1:
526 log.docstring_warning("Line %s: %s" % (linenums[0], message))
527 else:
528 linenums = ', '.join(['%s' % l for l in linenums])
529 log.docstring_warning("Lines %s: %s" % (linenums, message))
530
531 # Display all field warnings.
532 for warning in field_warnings:
533 log.docstring_warning(warning)
534
535 # End the message block.
536 log.end_block()
537
538RETURN_PDS = markup.parse('Returns:', markup='epytext')
539"""A ParsedDocstring containing the text 'Returns'. This is used to
540construct summary descriptions for routines that have empty C{descr},
541but non-empty C{return_descr}."""
542RETURN_PDS._tree.children[0].attribs['inline'] = True
543
544######################################################################
545#{ Field Processing Error Messages
546######################################################################
547
548UNEXPECTED_ARG = '%r did not expect an argument'
549EXPECTED_ARG = '%r expected an argument'
550EXPECTED_SINGLE_ARG = '%r expected a single argument'
551BAD_CONTEXT = 'Invalid context for %r'
552REDEFINED = 'Redefinition of %s'
553UNKNOWN_TAG = 'Unknown field tag %r'
554BAD_PARAM = '@%s for unknown parameter %s'
555
556######################################################################
557#{ Field Processing
558######################################################################
559
560def process_field(api_doc, docindex, tag, arg, descr):
561 """
562 Process a single field, and use it to update C{api_doc}. If
563 C{tag} is the name of a special field, then call its handler
564 function. If C{tag} is the name of a simple field, then use
565 C{process_simple_field} to process it. Otherwise, check if it's a
566 user-defined field, defined in this docstring or the docstring of
567 a containing object; and if so, process it with
568 C{process_simple_field}.
569
570 @param tag: The field's tag, such as C{'author'}
571 @param arg: The field's optional argument
572 @param descr: The description following the field tag and
573 argument.
574 @raise ValueError: If a problem was encountered while processing
575 the field. The C{ValueError}'s string argument is an
576 explanation of the problem, which should be displayed as a
577 warning message.
578 """
579 # standard special fields
580 if tag in _field_dispatch_table:
581 handler = _field_dispatch_table[tag]
582 handler(api_doc, docindex, tag, arg, descr)
583 return
584
585 # standard simple fields & user-defined fields
586 for field in STANDARD_FIELDS + user_docfields(api_doc, docindex):
587 if tag in field.tags:
588 # [xx] check if it's redefined if it's not multivalue??
589 if not field.takes_arg:
590 _check(api_doc, tag, arg, expect_arg=False)
591 api_doc.metadata.append((field, arg, descr))
592 return
593
594 # If we didn't handle the field, then report a warning.
595 raise ValueError(UNKNOWN_TAG % tag)
596
597def user_docfields(api_doc, docindex):
598 """
599 Return a list of user defined fields that can be used for the
600 given object. This list is taken from the given C{api_doc}, and
601 any of its containing C{NamepaceDoc}s.
602
603 @note: We assume here that a parent's docstring will always be
604 parsed before its childrens'. This is indeed the case when we
605 are called via L{docbuilder.build_doc_index()}. If a child's
606 docstring is parsed before its parents, then its parent won't
607 yet have had its C{extra_docstring_fields} attribute
608 initialized.
609 """
610 docfields = []
611 # Get any docfields from `api_doc` itself
612 if api_doc.extra_docstring_fields not in (None, UNKNOWN):
613 docfields += api_doc.extra_docstring_fields
614 # Get any docfields from `api_doc`'s ancestors
615 for i in range(len(api_doc.canonical_name)-1, 0, -1):
616 ancestor = docindex.get_valdoc(api_doc.canonical_name[:i])
617 if ancestor is not None \
618 and ancestor.extra_docstring_fields not in (None, UNKNOWN):
619 docfields += ancestor.extra_docstring_fields
620 return docfields
621
622_field_dispatch_table = {}
623def register_field_handler(handler, *field_tags):
624 """
625 Register the given field handler function for processing any
626 of the given field tags. Field handler functions should
627 have the following signature:
628
629 >>> def field_handler(api_doc, docindex, tag, arg, descr):
630 ... '''update api_doc in response to the field.'''
631
632 Where C{api_doc} is the documentation object to update;
633 C{docindex} is a L{DocIndex} that can be used to look up the
634 documentation for related objects; C{tag} is the field tag that
635 was used; C{arg} is the optional argument; and C{descr} is the
636 description following the field tag and argument.
637 """
638 for field_tag in field_tags:
639 _field_dispatch_table[field_tag] = handler
640
641######################################################################
642#{ Field Handler Functions
643######################################################################
644
645def process_summary_field(api_doc, docindex, tag, arg, descr):
646 """Store C{descr} in C{api_doc.summary}"""
647 _check(api_doc, tag, arg, expect_arg=False)
648 if api_doc.summary is not None:
649 raise ValueError(REDEFINED % tag)
650 api_doc.summary = descr
651
652def process_include_field(api_doc, docindex, tag, arg, descr):
653 """Copy the docstring contents from the object named in C{descr}"""
654 _check(api_doc, tag, arg, expect_arg=False)
655 # options:
656 # a. just append the descr to our own
657 # b. append descr and update metadata
658 # c. append descr and process all fields.
659 # in any case, mark any errors we may find as coming from an
660 # imported docstring.
661
662 # how does this interact with documentation inheritance??
663 raise ValueError('%s not implemented yet' % tag)
664
665def process_undocumented_field(api_doc, docindex, tag, arg, descr):
666 """Remove any documentation for the variables named in C{descr}"""
667 _check(api_doc, tag, arg, context=NamespaceDoc, expect_arg=False)
668 for ident in _descr_to_identifiers(descr):
669 var_name_re = re.compile('^%s$' % ident.replace('*', '(.*)'))
670 for var_name, var_doc in api_doc.variables.items():
671 if var_name_re.match(var_name):
672 # Remove the variable from `variables`.
673 api_doc.variables.pop(var_name, None)
674 if api_doc.sort_spec is not UNKNOWN:
675 try: api_doc.sort_spec.remove(var_name)
676 except ValueError: pass
677 # For modules, remove any submodules that match var_name_re.
678 if isinstance(api_doc, ModuleDoc):
679 removed = set([m for m in api_doc.submodules
680 if var_name_re.match(m.canonical_name[-1])])
681 if removed:
682 # Remove the indicated submodules from this module.
683 api_doc.submodules = [m for m in api_doc.submodules
684 if m not in removed]
685 # Remove all ancestors of the indicated submodules
686 # from the docindex root. E.g., if module x
687 # declares y to be undocumented, then x.y.z should
688 # also be undocumented.
689 for elt in docindex.root[:]:
690 for m in removed:
691 if m.canonical_name.dominates(elt.canonical_name):
692 docindex.root.remove(elt)
693
694def process_group_field(api_doc, docindex, tag, arg, descr):
695 """Define a group named C{arg} containing the variables whose
696 names are listed in C{descr}."""
697 _check(api_doc, tag, arg, context=NamespaceDoc, expect_arg=True)
698 api_doc.group_specs.append( (arg, _descr_to_identifiers(descr)) )
699 # [xx] should this also set sort order?
700
701def process_deffield_field(api_doc, docindex, tag, arg, descr):
702 """Define a new custom field."""
703 _check(api_doc, tag, arg, expect_arg=True)
704 if api_doc.extra_docstring_fields is UNKNOWN:
705 api_doc.extra_docstring_fields = []
706 try:
707 docstring_field = _descr_to_docstring_field(arg, descr)
708 docstring_field.varnames.append("__%s__" % arg)
709 api_doc.extra_docstring_fields.append(docstring_field)
710 except ValueError, e:
711 raise ValueError('Bad %s: %s' % (tag, e))
712
713def process_raise_field(api_doc, docindex, tag, arg, descr):
714 """Record the fact that C{api_doc} can raise the exception named
715 C{tag} in C{api_doc.exception_descrs}."""
716 _check(api_doc, tag, arg, context=RoutineDoc, expect_arg='single')
717 try: name = DottedName(arg, strict=True)
718 except DottedName.InvalidDottedName: name = arg
719 api_doc.exception_descrs.append( (name, descr) )
720
721def process_sort_field(api_doc, docindex, tag, arg, descr):
722 _check(api_doc, tag, arg, context=NamespaceDoc, expect_arg=False)
723 api_doc.sort_spec = _descr_to_identifiers(descr) + api_doc.sort_spec
724
725# [xx] should I notice when they give a type for an unknown var?
726def process_type_field(api_doc, docindex, tag, arg, descr):
727 # In namespace, "@type var: ..." describes the type of a var.
728 if isinstance(api_doc, NamespaceDoc):
729 _check(api_doc, tag, arg, expect_arg='single')
730 set_var_type(api_doc, arg, descr)
731
732 # For variables & properties, "@type: ..." describes the variable.
733 elif isinstance(api_doc, (VariableDoc, PropertyDoc)):
734 _check(api_doc, tag, arg, expect_arg=False)
735 if api_doc.type_descr is not None:
736 raise ValueError(REDEFINED % tag)
737 api_doc.type_descr = descr
738
739 # For routines, "@type param: ..." describes a parameter.
740 elif isinstance(api_doc, RoutineDoc):
741 _check(api_doc, tag, arg, expect_arg='single')
742 if arg in api_doc.arg_types:
743 raise ValueError(REDEFINED % ('type for '+arg))
744 api_doc.arg_types[arg] = descr
745
746 else:
747 raise ValueError(BAD_CONTEXT % tag)
748
749def process_var_field(api_doc, docindex, tag, arg, descr):
750 _check(api_doc, tag, arg, context=ModuleDoc, expect_arg=True)
751 for ident in re.split('[:;, ] *', arg):
752 set_var_descr(api_doc, ident, descr)
753
754def process_cvar_field(api_doc, docindex, tag, arg, descr):
755 # If @cvar is used *within* a variable, then use it as the
756 # variable's description, and treat the variable as a class var.
757 if (isinstance(api_doc, VariableDoc) and
758 isinstance(api_doc.container, ClassDoc)):
759 _check(api_doc, tag, arg, expect_arg=False)
760 api_doc.is_instvar = False
761 api_doc.descr = markup.ConcatenatedDocstring(api_doc.descr, descr)
762 api_doc.summary, api_doc.other_docs = descr.summary()
763
764 # Otherwise, @cvar should be used in a class.
765 else:
766 _check(api_doc, tag, arg, context=ClassDoc, expect_arg=True)
767 for ident in re.split('[:;, ] *', arg):
768 set_var_descr(api_doc, ident, descr)
769 api_doc.variables[ident].is_instvar = False
770
771def process_ivar_field(api_doc, docindex, tag, arg, descr):
772 # If @ivar is used *within* a variable, then use it as the
773 # variable's description, and treat the variable as an instvar.
774 if (isinstance(api_doc, VariableDoc) and
775 isinstance(api_doc.container, ClassDoc)):
776 _check(api_doc, tag, arg, expect_arg=False)
777 # require that there be no other descr?
778 api_doc.is_instvar = True
779 api_doc.descr = markup.ConcatenatedDocstring(api_doc.descr, descr)
780 api_doc.summary, api_doc.other_docs = descr.summary()
781
782 # Otherwise, @ivar should be used in a class.
783 else:
784 _check(api_doc, tag, arg, context=ClassDoc, expect_arg=True)
785 for ident in re.split('[:;, ] *', arg):
786 set_var_descr(api_doc, ident, descr)
787 api_doc.variables[ident].is_instvar = True
788
789# [xx] '@return: foo' used to get used as a descr if no other
790# descr was present. is that still true?
791def process_return_field(api_doc, docindex, tag, arg, descr):
792 _check(api_doc, tag, arg, context=RoutineDoc, expect_arg=False)
793 if api_doc.return_descr is not None:
794 raise ValueError(REDEFINED % 'return value description')
795 api_doc.return_descr = descr
796
797def process_rtype_field(api_doc, docindex, tag, arg, descr):
798 _check(api_doc, tag, arg,
799 context=(RoutineDoc, PropertyDoc), expect_arg=False)
800 if isinstance(api_doc, RoutineDoc):
801 if api_doc.return_type is not None:
802 raise ValueError(REDEFINED % 'return value type')
803 api_doc.return_type = descr
804
805 elif isinstance(api_doc, PropertyDoc):
806 _check(api_doc, tag, arg, expect_arg=False)
807 if api_doc.type_descr is not None:
808 raise ValueError(REDEFINED % tag)
809 api_doc.type_descr = descr
810
811def process_arg_field(api_doc, docindex, tag, arg, descr):
812 _check(api_doc, tag, arg, context=RoutineDoc, expect_arg=True)
813 idents = re.split('[:;, ] *', arg)
814 api_doc.arg_descrs.append( (idents, descr) )
815 # Check to make sure that the documented parameter(s) are
816 # actually part of the function signature.
817 all_args = api_doc.all_args()
818 if all_args not in (['...'], UNKNOWN):
819 bad_params = ['"%s"' % i for i in idents if i not in all_args]
820 if bad_params:
821 raise ValueError(BAD_PARAM % (tag, ', '.join(bad_params)))
822
823def process_kwarg_field(api_doc, docindex, tag, arg, descr):
824 # [xx] these should -not- be checked if they exist..
825 # and listed separately or not??
826 _check(api_doc, tag, arg, context=RoutineDoc, expect_arg=True)
827 idents = re.split('[:;, ] *', arg)
828 api_doc.arg_descrs.append( (idents, descr) )
829
830register_field_handler(process_group_field, 'group')
831register_field_handler(process_deffield_field, 'deffield', 'newfield')
832register_field_handler(process_sort_field, 'sort')
833register_field_handler(process_summary_field, 'summary')
834register_field_handler(process_undocumented_field, 'undocumented')
835register_field_handler(process_include_field, 'include')
836register_field_handler(process_var_field, 'var', 'variable')
837register_field_handler(process_type_field, 'type')
838register_field_handler(process_cvar_field, 'cvar', 'cvariable')
839register_field_handler(process_ivar_field, 'ivar', 'ivariable')
840register_field_handler(process_return_field, 'return', 'returns')
841register_field_handler(process_rtype_field, 'rtype', 'returntype')
842register_field_handler(process_arg_field, 'arg', 'argument',
843 'parameter', 'param')
844register_field_handler(process_kwarg_field, 'kwarg', 'keyword', 'kwparam')
845register_field_handler(process_raise_field, 'raise', 'raises',
846 'except', 'exception')
847
848# Tags related to function parameters
849PARAMETER_TAGS = ('arg', 'argument', 'parameter', 'param',
850 'kwarg', 'keyword', 'kwparam')
851
852# Tags related to variables in a class
853VARIABLE_TAGS = ('cvar', 'cvariable', 'ivar', 'ivariable')
854
855# Tags related to exceptions
856EXCEPTION_TAGS = ('raise', 'raises', 'except', 'exception')
857
858######################################################################
859#{ Helper Functions
860######################################################################
861
862def check_type_fields(api_doc, field_warnings):
863 """Check to make sure that all type fields correspond to some
864 documented parameter; if not, append a warning to field_warnings."""
865 if isinstance(api_doc, RoutineDoc):
866 for arg in api_doc.arg_types:
867 if arg not in api_doc.all_args():
868 for args, descr in api_doc.arg_descrs:
869 if arg in args:
870 break
871 else:
872 field_warnings.append(BAD_PARAM % ('type', '"%s"' % arg))
873
874def set_var_descr(api_doc, ident, descr):
875 if ident not in api_doc.variables:
876 api_doc.variables[ident] = VariableDoc(
877 container=api_doc, name=ident,
878 canonical_name=api_doc.canonical_name+ident)
879
880 var_doc = api_doc.variables[ident]
881 if var_doc.descr not in (None, UNKNOWN):
882 raise ValueError(REDEFINED % ('description for '+ident))
883 var_doc.descr = descr
884 if var_doc.summary in (None, UNKNOWN):
885 var_doc.summary, var_doc.other_docs = var_doc.descr.summary()
886
887def set_var_type(api_doc, ident, descr):
888 if ident not in api_doc.variables:
889 api_doc.variables[ident] = VariableDoc(
890 container=api_doc, name=ident,
891 canonical_name=api_doc.canonical_name+ident)
892
893 var_doc = api_doc.variables[ident]
894 if var_doc.type_descr not in (None, UNKNOWN):
895 raise ValueError(REDEFINED % ('type for '+ident))
896 var_doc.type_descr = descr
897
898def _check(api_doc, tag, arg, context=None, expect_arg=None):
899 if context is not None:
900 if not isinstance(api_doc, context):
901 raise ValueError(BAD_CONTEXT % tag)
902 if expect_arg is not None:
903 if expect_arg == True:
904 if arg is None:
905 raise ValueError(EXPECTED_ARG % tag)
906 elif expect_arg == False:
907 if arg is not None:
908 raise ValueError(UNEXPECTED_ARG % tag)
909 elif expect_arg == 'single':
910 if (arg is None or ' ' in arg):
911 raise ValueError(EXPECTED_SINGLE_ARG % tag)
912 else:
913 assert 0, 'bad value for expect_arg'
914
915def get_docformat(api_doc, docindex):
916 """
917 Return the name of the markup language that should be used to
918 parse the API documentation for the given object.
919 """
920 # Find the module that defines api_doc.
921 module = api_doc.defining_module
922 # Look up its docformat.
923 if module is not UNKNOWN and module.docformat not in (None, UNKNOWN):
924 docformat = module.docformat
925 else:
926 docformat = DEFAULT_DOCFORMAT
927 # Convert to lower case & strip region codes.
928 try: return docformat.lower().split()[0]
929 except: return DEFAULT_DOCFORMAT
930
931def unindent_docstring(docstring):
932 # [xx] copied from inspect.getdoc(); we can't use inspect.getdoc()
933 # itself, since it expects an object, not a string.
934
935 if not docstring: return ''
936 lines = docstring.expandtabs().split('\n')
937
938 # Find minimum indentation of any non-blank lines after first line.
939 margin = sys.maxint
940 for line in lines[1:]:
941 content = len(line.lstrip())
942 if content:
943 indent = len(line) - content
944 margin = min(margin, indent)
945 # Remove indentation.
946 if lines:
947 lines[0] = lines[0].lstrip()
948 if margin < sys.maxint:
949 for i in range(1, len(lines)): lines[i] = lines[i][margin:]
950 # Remove any trailing (but not leading!) blank lines.
951 while lines and not lines[-1]:
952 lines.pop()
953 #while lines and not lines[0]:
954 # lines.pop(0)
955 return '\n'.join(lines)
956
957_IDENTIFIER_LIST_REGEXP = re.compile(r'^[\w.\*]+([\s,:;]\s*[\w.\*]+)*$')
958def _descr_to_identifiers(descr):
959 """
960 Given a C{ParsedDocstring} that contains a list of identifiers,
961 return a list of those identifiers. This is used by fields such
962 as C{@group} and C{@sort}, which expect lists of identifiers as
963 their values. To extract the identifiers, the docstring is first
964 converted to plaintext, and then split. The plaintext content of
965 the docstring must be a a list of identifiers, separated by
966 spaces, commas, colons, or semicolons.
967
968 @rtype: C{list} of C{string}
969 @return: A list of the identifier names contained in C{descr}.
970 @type descr: L{markup.ParsedDocstring}
971 @param descr: A C{ParsedDocstring} containing a list of
972 identifiers.
973 @raise ValueError: If C{descr} does not contain a valid list of
974 identifiers.
975 """
976 idents = descr.to_plaintext(None).strip()
977 idents = re.sub(r'\s+', ' ', idents)
978 if not _IDENTIFIER_LIST_REGEXP.match(idents):
979 raise ValueError, 'Bad Identifier list: %r' % idents
980 rval = re.split('[:;, ] *', idents)
981 return rval
982
983def _descr_to_docstring_field(arg, descr):
984 tags = [s.lower() for s in re.split('[:;, ] *', arg)]
985 descr = descr.to_plaintext(None).strip()
986 args = re.split('[:;,] *', descr)
987 if len(args) == 0 or len(args) > 3:
988 raise ValueError, 'Wrong number of arguments'
989 singular = args[0]
990 if len(args) >= 2: plural = args[1]
991 else: plural = None
992 short = 0
993 if len(args) >= 3:
994 if args[2] == 'short': short = 1
995 else: raise ValueError('Bad arg 2 (expected "short")')
996 return DocstringField(tags, singular, plural, short)
997
998######################################################################
999#{ Function Signature Extraction
1000######################################################################
1001
1002# [XX] todo: add optional type modifiers?
1003_SIGNATURE_RE = re.compile(
1004 # Class name (for builtin methods)
1005 r'^\s*((?P<self>\w+)\.)?' +
1006 # The function name (must match exactly) [XX] not anymore!
1007 r'(?P<func>\w+)' +
1008 # The parameters
1009 r'\((?P<params>(\s*\[?\s*\*{0,2}[\w\-\.]+(\s*=.+?)?'+
1010 r'(\s*\[?\s*,\s*\]?\s*\*{0,2}[\w\-\.]+(\s*=.+?)?)*\]*)?)\s*\)' +
1011 # The return value (optional)
1012 r'(\s*(->)\s*(?P<return>\S.*?))?'+
1013 # The end marker
1014 r'\s*(\n|\s+(--|<=+>)\s+|$|\.\s+|\.\n)')
1015"""A regular expression that is used to extract signatures from
1016docstrings."""
1017
1018def parse_function_signature(func_doc, doc_source, docformat, parse_errors):
1019 """
1020 Construct the signature for a builtin function or method from
1021 its docstring. If the docstring uses the standard convention
1022 of including a signature in the first line of the docstring
1023 (and formats that signature according to standard
1024 conventions), then it will be used to extract a signature.
1025 Otherwise, the signature will be set to a single varargs
1026 variable named C{"..."}.
1027
1028 @param func_doc: The target object where to store parsed signature. Also
1029 container of the docstring to parse if doc_source is C{None}
1030 @type func_doc: L{RoutineDoc}
1031 @param doc_source: Contains the docstring to parse. If C{None}, parse
1032 L{func_doc} docstring instead
1033 @type doc_source: L{APIDoc}
1034 @rtype: C{None}
1035 """
1036 if doc_source is None:
1037 doc_source = func_doc
1038
1039 # If there's no docstring, then don't do anything.
1040 if not doc_source.docstring: return False
1041
1042 m = _SIGNATURE_RE.match(doc_source.docstring)
1043 if m is None: return False
1044
1045 # Do I want to be this strict?
1046 # Notice that __init__ must match the class name instead, if the signature
1047 # comes from the class docstring
1048# if not (m.group('func') == func_doc.canonical_name[-1] or
1049# '_'+m.group('func') == func_doc.canonical_name[-1]):
1050# log.warning("Not extracting function signature from %s's "
1051# "docstring, since the name doesn't match." %
1052# func_doc.canonical_name)
1053# return False
1054
1055 params = m.group('params')
1056 rtype = m.group('return')
1057 selfparam = m.group('self')
1058
1059 # Extract the parameters from the signature.
1060 func_doc.posargs = []
1061 func_doc.vararg = None
1062 func_doc.kwarg = None
1063 if func_doc.posarg_defaults is UNKNOWN:
1064 func_doc.posarg_defaults = []
1065 if params:
1066 # Figure out which parameters are optional.
1067 while '[' in params or ']' in params:
1068 m2 = re.match(r'(.*)\[([^\[\]]+)\](.*)', params)
1069 if not m2: return False
1070 (start, mid, end) = m2.groups()
1071 mid = re.sub(r'((,|^)\s*[\w\-\.]+)', r'\1=...', mid)
1072 params = start+mid+end
1073
1074 params = re.sub(r'=...=' , r'=', params)
1075 for name in params.split(','):
1076 if '=' in name:
1077 (name, default_repr) = name.split('=',1)
1078 default = GenericValueDoc(parse_repr=default_repr)
1079 else:
1080 default = None
1081 name = name.strip()
1082 if name == '...':
1083 func_doc.vararg = '...'
1084 elif name.startswith('**'):
1085 func_doc.kwarg = name[2:]
1086 elif name.startswith('*'):
1087 func_doc.vararg = name[1:]
1088 else:
1089 func_doc.posargs.append(name)
1090 if len(func_doc.posarg_defaults) < len(func_doc.posargs):
1091 func_doc.posarg_defaults.append(default)
1092 elif default is not None:
1093 argnum = len(func_doc.posargs)-1
1094 func_doc.posarg_defaults[argnum] = default
1095
1096 # Extract the return type/value from the signature
1097 if rtype:
1098 func_doc.return_type = markup.parse(rtype, docformat, parse_errors,
1099 inline=True)
1100
1101 # Add the self parameter, if it was specified.
1102 if selfparam:
1103 func_doc.posargs.insert(0, selfparam)
1104 func_doc.posarg_defaults.insert(0, None)
1105
1106 # Remove the signature from the docstring.
1107 doc_source.docstring = doc_source.docstring[m.end():]
1108
1109 # We found a signature.
1110 return True
1111