| """Simple code to extract class & function docstrings from a module. |
| |
| This code is used as an example in the library reference manual in the |
| section on using the parser module. Refer to the manual for a thorough |
| discussion of the operation of this code. |
| """ |
| |
| import os |
| import parser |
| import symbol |
| import token |
| import types |
| |
| from types import ListType, TupleType |
| |
| |
| def get_docs(fileName): |
| """Retrieve information from the parse tree of a source file. |
| |
| fileName |
| Name of the file to read Python source code from. |
| """ |
| source = open(fileName).read() |
| basename = os.path.basename(os.path.splitext(fileName)[0]) |
| ast = parser.suite(source) |
| return ModuleInfo(ast.totuple(), basename) |
| |
| |
| class SuiteInfoBase: |
| _docstring = '' |
| _name = '' |
| |
| def __init__(self, tree = None): |
| self._class_info = {} |
| self._function_info = {} |
| if tree: |
| self._extract_info(tree) |
| |
| def _extract_info(self, tree): |
| # extract docstring |
| if len(tree) == 2: |
| found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1]) |
| else: |
| found, vars = match(DOCSTRING_STMT_PATTERN, tree[3]) |
| if found: |
| self._docstring = eval(vars['docstring']) |
| # discover inner definitions |
| for node in tree[1:]: |
| found, vars = match(COMPOUND_STMT_PATTERN, node) |
| if found: |
| cstmt = vars['compound'] |
| if cstmt[0] == symbol.funcdef: |
| name = cstmt[2][1] |
| self._function_info[name] = FunctionInfo(cstmt) |
| elif cstmt[0] == symbol.classdef: |
| name = cstmt[2][1] |
| self._class_info[name] = ClassInfo(cstmt) |
| |
| def get_docstring(self): |
| return self._docstring |
| |
| def get_name(self): |
| return self._name |
| |
| def get_class_names(self): |
| return self._class_info.keys() |
| |
| def get_class_info(self, name): |
| return self._class_info[name] |
| |
| def __getitem__(self, name): |
| try: |
| return self._class_info[name] |
| except KeyError: |
| return self._function_info[name] |
| |
| |
| class SuiteFuncInfo: |
| # Mixin class providing access to function names and info. |
| |
| def get_function_names(self): |
| return self._function_info.keys() |
| |
| def get_function_info(self, name): |
| return self._function_info[name] |
| |
| |
| class FunctionInfo(SuiteInfoBase, SuiteFuncInfo): |
| def __init__(self, tree = None): |
| self._name = tree[2][1] |
| SuiteInfoBase.__init__(self, tree and tree[-1] or None) |
| |
| |
| class ClassInfo(SuiteInfoBase): |
| def __init__(self, tree = None): |
| self._name = tree[2][1] |
| SuiteInfoBase.__init__(self, tree and tree[-1] or None) |
| |
| def get_method_names(self): |
| return self._function_info.keys() |
| |
| def get_method_info(self, name): |
| return self._function_info[name] |
| |
| |
| class ModuleInfo(SuiteInfoBase, SuiteFuncInfo): |
| def __init__(self, tree = None, name = "<string>"): |
| self._name = name |
| SuiteInfoBase.__init__(self, tree) |
| if tree: |
| found, vars = match(DOCSTRING_STMT_PATTERN, tree[1]) |
| if found: |
| self._docstring = vars["docstring"] |
| |
| |
| def match(pattern, data, vars=None): |
| """Match `data' to `pattern', with variable extraction. |
| |
| pattern |
| Pattern to match against, possibly containing variables. |
| |
| data |
| Data to be checked and against which variables are extracted. |
| |
| vars |
| Dictionary of variables which have already been found. If not |
| provided, an empty dictionary is created. |
| |
| The `pattern' value may contain variables of the form ['varname'] which |
| are allowed to match anything. The value that is matched is returned as |
| part of a dictionary which maps 'varname' to the matched value. 'varname' |
| is not required to be a string object, but using strings makes patterns |
| and the code which uses them more readable. |
| |
| This function returns two values: a boolean indicating whether a match |
| was found and a dictionary mapping variable names to their associated |
| values. |
| """ |
| if vars is None: |
| vars = {} |
| if type(pattern) is ListType: # 'variables' are ['varname'] |
| vars[pattern[0]] = data |
| return 1, vars |
| if type(pattern) is not TupleType: |
| return (pattern == data), vars |
| if len(data) != len(pattern): |
| return 0, vars |
| for pattern, data in map(None, pattern, data): |
| same, vars = match(pattern, data, vars) |
| if not same: |
| break |
| return same, vars |
| |
| |
| # This pattern identifies compound statements, allowing them to be readily |
| # differentiated from simple statements. |
| # |
| COMPOUND_STMT_PATTERN = ( |
| symbol.stmt, |
| (symbol.compound_stmt, ['compound']) |
| ) |
| |
| |
| # This pattern will match a 'stmt' node which *might* represent a docstring; |
| # docstrings require that the statement which provides the docstring be the |
| # first statement in the class or function, which this pattern does not check. |
| # |
| DOCSTRING_STMT_PATTERN = ( |
| symbol.stmt, |
| (symbol.simple_stmt, |
| (symbol.small_stmt, |
| (symbol.expr_stmt, |
| (symbol.testlist, |
| (symbol.test, |
| (symbol.and_test, |
| (symbol.not_test, |
| (symbol.comparison, |
| (symbol.expr, |
| (symbol.xor_expr, |
| (symbol.and_expr, |
| (symbol.shift_expr, |
| (symbol.arith_expr, |
| (symbol.term, |
| (symbol.factor, |
| (symbol.power, |
| (symbol.atom, |
| (token.STRING, ['docstring']) |
| )))))))))))))))), |
| (token.NEWLINE, '') |
| )) |