Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 1 | """Simple code to extract class & function docstrings from a module. |
| 2 | |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 3 | This code is used as an example in the library reference manual in the |
| 4 | section on using the parser module. Refer to the manual for a thorough |
| 5 | discussion of the operation of this code. |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 6 | """ |
| 7 | |
Fred Drake | 995285e | 1999-11-19 21:57:56 +0000 | [diff] [blame] | 8 | import os |
| 9 | import parser |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 10 | import symbol |
| 11 | import token |
| 12 | import types |
| 13 | |
Fred Drake | 995285e | 1999-11-19 21:57:56 +0000 | [diff] [blame] | 14 | from types import ListType, TupleType |
| 15 | |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 16 | |
| 17 | def get_docs(fileName): |
| 18 | """Retrieve information from the parse tree of a source file. |
| 19 | |
| 20 | fileName |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 21 | Name of the file to read Python source code from. |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 22 | """ |
| 23 | source = open(fileName).read() |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 24 | basename = os.path.basename(os.path.splitext(fileName)[0]) |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 25 | ast = parser.suite(source) |
Fred Drake | 995285e | 1999-11-19 21:57:56 +0000 | [diff] [blame] | 26 | return ModuleInfo(ast.totuple(), basename) |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 27 | |
| 28 | |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 29 | class SuiteInfoBase: |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 30 | _docstring = '' |
| 31 | _name = '' |
| 32 | |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 33 | def __init__(self, tree = None): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 34 | self._class_info = {} |
| 35 | self._function_info = {} |
| 36 | if tree: |
| 37 | self._extract_info(tree) |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 38 | |
| 39 | def _extract_info(self, tree): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 40 | # extract docstring |
| 41 | if len(tree) == 2: |
| 42 | found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1]) |
| 43 | else: |
| 44 | found, vars = match(DOCSTRING_STMT_PATTERN, tree[3]) |
| 45 | if found: |
| 46 | self._docstring = eval(vars['docstring']) |
| 47 | # discover inner definitions |
| 48 | for node in tree[1:]: |
| 49 | found, vars = match(COMPOUND_STMT_PATTERN, node) |
| 50 | if found: |
| 51 | cstmt = vars['compound'] |
| 52 | if cstmt[0] == symbol.funcdef: |
| 53 | name = cstmt[2][1] |
| 54 | self._function_info[name] = FunctionInfo(cstmt) |
| 55 | elif cstmt[0] == symbol.classdef: |
| 56 | name = cstmt[2][1] |
| 57 | self._class_info[name] = ClassInfo(cstmt) |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 58 | |
| 59 | def get_docstring(self): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 60 | return self._docstring |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 61 | |
| 62 | def get_name(self): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 63 | return self._name |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 64 | |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 65 | def get_class_names(self): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 66 | return self._class_info.keys() |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 67 | |
| 68 | def get_class_info(self, name): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 69 | return self._class_info[name] |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 70 | |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 71 | def __getitem__(self, name): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 72 | try: |
| 73 | return self._class_info[name] |
| 74 | except KeyError: |
| 75 | return self._function_info[name] |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 76 | |
| 77 | |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 78 | class SuiteFuncInfo: |
| 79 | # Mixin class providing access to function names and info. |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 80 | |
| 81 | def get_function_names(self): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 82 | return self._function_info.keys() |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 83 | |
| 84 | def get_function_info(self, name): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 85 | return self._function_info[name] |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 86 | |
| 87 | |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 88 | class FunctionInfo(SuiteInfoBase, SuiteFuncInfo): |
| 89 | def __init__(self, tree = None): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 90 | self._name = tree[2][1] |
| 91 | SuiteInfoBase.__init__(self, tree and tree[-1] or None) |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 92 | |
| 93 | |
| 94 | class ClassInfo(SuiteInfoBase): |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 95 | def __init__(self, tree = None): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 96 | self._name = tree[2][1] |
| 97 | SuiteInfoBase.__init__(self, tree and tree[-1] or None) |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 98 | |
| 99 | def get_method_names(self): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 100 | return self._function_info.keys() |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 101 | |
| 102 | def get_method_info(self, name): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 103 | return self._function_info[name] |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 104 | |
| 105 | |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 106 | class ModuleInfo(SuiteInfoBase, SuiteFuncInfo): |
| 107 | def __init__(self, tree = None, name = "<string>"): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 108 | self._name = name |
| 109 | SuiteInfoBase.__init__(self, tree) |
| 110 | if tree: |
| 111 | found, vars = match(DOCSTRING_STMT_PATTERN, tree[1]) |
| 112 | if found: |
| 113 | self._docstring = vars["docstring"] |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 114 | |
| 115 | |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 116 | def match(pattern, data, vars=None): |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 117 | """Match `data' to `pattern', with variable extraction. |
| 118 | |
| 119 | pattern |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 120 | Pattern to match against, possibly containing variables. |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 121 | |
| 122 | data |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 123 | Data to be checked and against which variables are extracted. |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 124 | |
| 125 | vars |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 126 | Dictionary of variables which have already been found. If not |
| 127 | provided, an empty dictionary is created. |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 128 | |
| 129 | The `pattern' value may contain variables of the form ['varname'] which |
| 130 | are allowed to match anything. The value that is matched is returned as |
| 131 | part of a dictionary which maps 'varname' to the matched value. 'varname' |
| 132 | is not required to be a string object, but using strings makes patterns |
| 133 | and the code which uses them more readable. |
| 134 | |
| 135 | This function returns two values: a boolean indicating whether a match |
| 136 | was found and a dictionary mapping variable names to their associated |
| 137 | values. |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 138 | """ |
| 139 | if vars is None: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 140 | vars = {} |
| 141 | if type(pattern) is ListType: # 'variables' are ['varname'] |
| 142 | vars[pattern[0]] = data |
| 143 | return 1, vars |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 144 | if type(pattern) is not TupleType: |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 145 | return (pattern == data), vars |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 146 | if len(data) != len(pattern): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 147 | return 0, vars |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 148 | for pattern, data in map(None, pattern, data): |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 149 | same, vars = match(pattern, data, vars) |
| 150 | if not same: |
| 151 | break |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 152 | return same, vars |
| 153 | |
| 154 | |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 155 | # This pattern identifies compound statements, allowing them to be readily |
| 156 | # differentiated from simple statements. |
| 157 | # |
| 158 | COMPOUND_STMT_PATTERN = ( |
| 159 | symbol.stmt, |
| 160 | (symbol.compound_stmt, ['compound']) |
| 161 | ) |
| 162 | |
| 163 | |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 164 | # This pattern will match a 'stmt' node which *might* represent a docstring; |
| 165 | # docstrings require that the statement which provides the docstring be the |
| 166 | # first statement in the class or function, which this pattern does not check. |
| 167 | # |
| 168 | DOCSTRING_STMT_PATTERN = ( |
| 169 | symbol.stmt, |
| 170 | (symbol.simple_stmt, |
| 171 | (symbol.small_stmt, |
| 172 | (symbol.expr_stmt, |
| 173 | (symbol.testlist, |
Guido van Rossum | 4117e54 | 1998-09-14 16:44:15 +0000 | [diff] [blame] | 174 | (symbol.test, |
| 175 | (symbol.and_test, |
| 176 | (symbol.not_test, |
| 177 | (symbol.comparison, |
| 178 | (symbol.expr, |
| 179 | (symbol.xor_expr, |
| 180 | (symbol.and_expr, |
| 181 | (symbol.shift_expr, |
| 182 | (symbol.arith_expr, |
| 183 | (symbol.term, |
| 184 | (symbol.factor, |
| 185 | (symbol.power, |
| 186 | (symbol.atom, |
| 187 | (token.STRING, ['docstring']) |
| 188 | )))))))))))))))), |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 189 | (token.NEWLINE, '') |
| 190 | )) |