| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 1 | """Simple code to extract class & function docstrings from a module. | 
 | 2 |  | 
| Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 3 | This code is used as an example in the library reference manual in the | 
 | 4 | section on using the parser module.  Refer to the manual for a thorough | 
 | 5 | discussion of the operation of this code. | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 6 | """ | 
 | 7 |  | 
| Fred Drake | 995285e | 1999-11-19 21:57:56 +0000 | [diff] [blame] | 8 | import os | 
 | 9 | import parser | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 10 | import symbol | 
 | 11 | import token | 
 | 12 | import types | 
 | 13 |  | 
| Fred Drake | 995285e | 1999-11-19 21:57:56 +0000 | [diff] [blame] | 14 | from types import ListType, TupleType | 
 | 15 |  | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 16 |  | 
 | 17 | def get_docs(fileName): | 
 | 18 |     """Retrieve information from the parse tree of a source file. | 
 | 19 |  | 
 | 20 |     fileName | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 21 |         Name of the file to read Python source code from. | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 22 |     """ | 
 | 23 |     source = open(fileName).read() | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 24 |     basename = os.path.basename(os.path.splitext(fileName)[0]) | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 25 |     ast = parser.suite(source) | 
| Fred Drake | 995285e | 1999-11-19 21:57:56 +0000 | [diff] [blame] | 26 |     return ModuleInfo(ast.totuple(), basename) | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 27 |  | 
 | 28 |  | 
| Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 29 | class SuiteInfoBase: | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 30 |     _docstring = '' | 
 | 31 |     _name = '' | 
 | 32 |  | 
| Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 33 |     def __init__(self, tree = None): | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 34 |         self._class_info = {} | 
 | 35 |         self._function_info = {} | 
 | 36 |         if tree: | 
 | 37 |             self._extract_info(tree) | 
| Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 38 |  | 
 | 39 |     def _extract_info(self, tree): | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 40 |         # extract docstring | 
 | 41 |         if len(tree) == 2: | 
 | 42 |             found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1]) | 
 | 43 |         else: | 
 | 44 |             found, vars = match(DOCSTRING_STMT_PATTERN, tree[3]) | 
 | 45 |         if found: | 
 | 46 |             self._docstring = eval(vars['docstring']) | 
 | 47 |         # discover inner definitions | 
 | 48 |         for node in tree[1:]: | 
 | 49 |             found, vars = match(COMPOUND_STMT_PATTERN, node) | 
 | 50 |             if found: | 
 | 51 |                 cstmt = vars['compound'] | 
 | 52 |                 if cstmt[0] == symbol.funcdef: | 
 | 53 |                     name = cstmt[2][1] | 
 | 54 |                     self._function_info[name] = FunctionInfo(cstmt) | 
 | 55 |                 elif cstmt[0] == symbol.classdef: | 
 | 56 |                     name = cstmt[2][1] | 
 | 57 |                     self._class_info[name] = ClassInfo(cstmt) | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 58 |  | 
 | 59 |     def get_docstring(self): | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 60 |         return self._docstring | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 61 |  | 
 | 62 |     def get_name(self): | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 63 |         return self._name | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 64 |  | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 65 |     def get_class_names(self): | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 66 |         return self._class_info.keys() | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 67 |  | 
 | 68 |     def get_class_info(self, name): | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 69 |         return self._class_info[name] | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 70 |  | 
| Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 71 |     def __getitem__(self, name): | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 72 |         try: | 
 | 73 |             return self._class_info[name] | 
 | 74 |         except KeyError: | 
 | 75 |             return self._function_info[name] | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 76 |  | 
 | 77 |  | 
| Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 78 | class SuiteFuncInfo: | 
 | 79 |     #  Mixin class providing access to function names and info. | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 80 |  | 
 | 81 |     def get_function_names(self): | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 82 |         return self._function_info.keys() | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 83 |  | 
 | 84 |     def get_function_info(self, name): | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 85 |         return self._function_info[name] | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 86 |  | 
 | 87 |  | 
| Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 88 | class FunctionInfo(SuiteInfoBase, SuiteFuncInfo): | 
 | 89 |     def __init__(self, tree = None): | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 90 |         self._name = tree[2][1] | 
 | 91 |         SuiteInfoBase.__init__(self, tree and tree[-1] or None) | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 92 |  | 
 | 93 |  | 
 | 94 | class ClassInfo(SuiteInfoBase): | 
| Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 95 |     def __init__(self, tree = None): | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 96 |         self._name = tree[2][1] | 
 | 97 |         SuiteInfoBase.__init__(self, tree and tree[-1] or None) | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 98 |  | 
 | 99 |     def get_method_names(self): | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 100 |         return self._function_info.keys() | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 101 |  | 
 | 102 |     def get_method_info(self, name): | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 103 |         return self._function_info[name] | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 104 |  | 
 | 105 |  | 
| Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 106 | class ModuleInfo(SuiteInfoBase, SuiteFuncInfo): | 
 | 107 |     def __init__(self, tree = None, name = "<string>"): | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 108 |         self._name = name | 
 | 109 |         SuiteInfoBase.__init__(self, tree) | 
 | 110 |         if tree: | 
 | 111 |             found, vars = match(DOCSTRING_STMT_PATTERN, tree[1]) | 
 | 112 |             if found: | 
 | 113 |                 self._docstring = vars["docstring"] | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 114 |  | 
 | 115 |  | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 116 | def match(pattern, data, vars=None): | 
| Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 117 |     """Match `data' to `pattern', with variable extraction. | 
 | 118 |  | 
 | 119 |     pattern | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 120 |         Pattern to match against, possibly containing variables. | 
| Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 121 |  | 
 | 122 |     data | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 123 |         Data to be checked and against which variables are extracted. | 
| Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 124 |  | 
 | 125 |     vars | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 126 |         Dictionary of variables which have already been found.  If not | 
 | 127 |         provided, an empty dictionary is created. | 
| Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 128 |  | 
 | 129 |     The `pattern' value may contain variables of the form ['varname'] which | 
 | 130 |     are allowed to match anything.  The value that is matched is returned as | 
 | 131 |     part of a dictionary which maps 'varname' to the matched value.  'varname' | 
 | 132 |     is not required to be a string object, but using strings makes patterns | 
 | 133 |     and the code which uses them more readable. | 
 | 134 |  | 
 | 135 |     This function returns two values: a boolean indicating whether a match | 
 | 136 |     was found and a dictionary mapping variable names to their associated | 
 | 137 |     values. | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 138 |     """ | 
 | 139 |     if vars is None: | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 140 |         vars = {} | 
 | 141 |     if type(pattern) is ListType:       # 'variables' are ['varname'] | 
 | 142 |         vars[pattern[0]] = data | 
 | 143 |         return 1, vars | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 144 |     if type(pattern) is not TupleType: | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 145 |         return (pattern == data), vars | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 146 |     if len(data) != len(pattern): | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 147 |         return 0, vars | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 148 |     for pattern, data in map(None, pattern, data): | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 149 |         same, vars = match(pattern, data, vars) | 
 | 150 |         if not same: | 
 | 151 |             break | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 152 |     return same, vars | 
 | 153 |  | 
 | 154 |  | 
| Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 155 | #  This pattern identifies compound statements, allowing them to be readily | 
 | 156 | #  differentiated from simple statements. | 
 | 157 | # | 
 | 158 | COMPOUND_STMT_PATTERN = ( | 
 | 159 |     symbol.stmt, | 
 | 160 |     (symbol.compound_stmt, ['compound']) | 
 | 161 |     ) | 
 | 162 |  | 
 | 163 |  | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 164 | #  This pattern will match a 'stmt' node which *might* represent a docstring; | 
 | 165 | #  docstrings require that the statement which provides the docstring be the | 
 | 166 | #  first statement in the class or function, which this pattern does not check. | 
 | 167 | # | 
 | 168 | DOCSTRING_STMT_PATTERN = ( | 
 | 169 |     symbol.stmt, | 
 | 170 |     (symbol.simple_stmt, | 
 | 171 |      (symbol.small_stmt, | 
 | 172 |       (symbol.expr_stmt, | 
 | 173 |        (symbol.testlist, | 
| Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 174 |         (symbol.test, | 
 | 175 |          (symbol.and_test, | 
 | 176 |           (symbol.not_test, | 
 | 177 |            (symbol.comparison, | 
 | 178 |             (symbol.expr, | 
 | 179 |              (symbol.xor_expr, | 
 | 180 |               (symbol.and_expr, | 
 | 181 |                (symbol.shift_expr, | 
 | 182 |                 (symbol.arith_expr, | 
 | 183 |                  (symbol.term, | 
 | 184 |                   (symbol.factor, | 
 | 185 |                    (symbol.power, | 
 | 186 |                     (symbol.atom, | 
 | 187 |                      (token.STRING, ['docstring']) | 
 | 188 |                      )))))))))))))))), | 
| Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 189 |      (token.NEWLINE, '') | 
 | 190 |      )) |