Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 1 | """Simple code to extract class & function docstrings from a module. |
| 2 | |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 3 | This code is used as an example in the library reference manual in the |
| 4 | section on using the parser module. Refer to the manual for a thorough |
| 5 | discussion of the operation of this code. |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 6 | """ |
| 7 | |
| 8 | import symbol |
| 9 | import token |
| 10 | import types |
| 11 | |
| 12 | |
| 13 | def get_docs(fileName): |
| 14 | """Retrieve information from the parse tree of a source file. |
| 15 | |
| 16 | fileName |
| 17 | Name of the file to read Python source code from. |
| 18 | """ |
| 19 | source = open(fileName).read() |
| 20 | import os |
| 21 | basename = os.path.basename(os.path.splitext(fileName)[0]) |
| 22 | import parser |
| 23 | ast = parser.suite(source) |
| 24 | tup = parser.ast2tuple(ast) |
| 25 | return ModuleInfo(tup, basename) |
| 26 | |
| 27 | |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 28 | class SuiteInfoBase: |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 29 | _docstring = '' |
| 30 | _name = '' |
| 31 | |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 32 | def __init__(self, tree = None): |
| 33 | self._class_info = {} |
| 34 | self._function_info = {} |
| 35 | if tree: |
| 36 | self._extract_info(tree) |
| 37 | |
| 38 | def _extract_info(self, tree): |
| 39 | # extract docstring |
| 40 | if len(tree) == 2: |
| 41 | found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1]) |
| 42 | else: |
| 43 | found, vars = match(DOCSTRING_STMT_PATTERN, tree[3]) |
| 44 | if found: |
| 45 | self._docstring = eval(vars['docstring']) |
| 46 | # discover inner definitions |
| 47 | for node in tree[1:]: |
| 48 | found, vars = match(COMPOUND_STMT_PATTERN, node) |
| 49 | if found: |
| 50 | cstmt = vars['compound'] |
| 51 | if cstmt[0] == symbol.funcdef: |
| 52 | name = cstmt[2][1] |
| 53 | self._function_info[name] = FunctionInfo(cstmt) |
| 54 | elif cstmt[0] == symbol.classdef: |
| 55 | name = cstmt[2][1] |
| 56 | self._class_info[name] = ClassInfo(cstmt) |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 57 | |
| 58 | def get_docstring(self): |
| 59 | return self._docstring |
| 60 | |
| 61 | def get_name(self): |
| 62 | return self._name |
| 63 | |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 64 | def get_class_names(self): |
| 65 | return self._class_info.keys() |
| 66 | |
| 67 | def get_class_info(self, name): |
| 68 | return self._class_info[name] |
| 69 | |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 70 | def __getitem__(self, name): |
| 71 | try: |
| 72 | return self._class_info[name] |
| 73 | except KeyError: |
| 74 | return self._function_info[name] |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 75 | |
| 76 | |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 77 | class SuiteFuncInfo: |
| 78 | # Mixin class providing access to function names and info. |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 79 | |
| 80 | def get_function_names(self): |
| 81 | return self._function_info.keys() |
| 82 | |
| 83 | def get_function_info(self, name): |
| 84 | return self._function_info[name] |
| 85 | |
| 86 | |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 87 | class FunctionInfo(SuiteInfoBase, SuiteFuncInfo): |
| 88 | def __init__(self, tree = None): |
| 89 | self._name = tree[2][1] |
| 90 | SuiteInfoBase.__init__(self, tree and tree[-1] or None) |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 91 | |
| 92 | |
| 93 | class ClassInfo(SuiteInfoBase): |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 94 | def __init__(self, tree = None): |
| 95 | self._name = tree[2][1] |
| 96 | SuiteInfoBase.__init__(self, tree and tree[-1] or None) |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 97 | |
| 98 | def get_method_names(self): |
| 99 | return self._function_info.keys() |
| 100 | |
| 101 | def get_method_info(self, name): |
| 102 | return self._function_info[name] |
| 103 | |
| 104 | |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 105 | class ModuleInfo(SuiteInfoBase, SuiteFuncInfo): |
| 106 | def __init__(self, tree = None, name = "<string>"): |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 107 | self._name = name |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 108 | SuiteInfoBase.__init__(self, tree) |
| 109 | if tree: |
| 110 | found, vars = match(DOCSTRING_STMT_PATTERN, tree[1]) |
| 111 | if found: |
| 112 | self._docstring = vars["docstring"] |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 113 | |
| 114 | |
| 115 | from types import ListType, TupleType |
| 116 | |
| 117 | def match(pattern, data, vars=None): |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 118 | """Match `data' to `pattern', with variable extraction. |
| 119 | |
| 120 | pattern |
| 121 | Pattern to match against, possibly containing variables. |
| 122 | |
| 123 | data |
| 124 | Data to be checked and against which variables are extracted. |
| 125 | |
| 126 | vars |
| 127 | Dictionary of variables which have already been found. If not |
| 128 | provided, an empty dictionary is created. |
| 129 | |
| 130 | The `pattern' value may contain variables of the form ['varname'] which |
| 131 | are allowed to match anything. The value that is matched is returned as |
| 132 | part of a dictionary which maps 'varname' to the matched value. 'varname' |
| 133 | is not required to be a string object, but using strings makes patterns |
| 134 | and the code which uses them more readable. |
| 135 | |
| 136 | This function returns two values: a boolean indicating whether a match |
| 137 | was found and a dictionary mapping variable names to their associated |
| 138 | values. |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 139 | """ |
| 140 | if vars is None: |
| 141 | vars = {} |
| 142 | if type(pattern) is ListType: # 'variables' are ['varname'] |
| 143 | vars[pattern[0]] = data |
| 144 | return 1, vars |
| 145 | if type(pattern) is not TupleType: |
| 146 | return (pattern == data), vars |
| 147 | if len(data) != len(pattern): |
| 148 | return 0, vars |
| 149 | for pattern, data in map(None, pattern, data): |
| 150 | same, vars = match(pattern, data, vars) |
| 151 | if not same: |
| 152 | break |
| 153 | return same, vars |
| 154 | |
| 155 | |
Guido van Rossum | 8206fb9 | 1996-08-26 00:33:29 +0000 | [diff] [blame] | 156 | # This pattern identifies compound statements, allowing them to be readily |
| 157 | # differentiated from simple statements. |
| 158 | # |
| 159 | COMPOUND_STMT_PATTERN = ( |
| 160 | symbol.stmt, |
| 161 | (symbol.compound_stmt, ['compound']) |
| 162 | ) |
| 163 | |
| 164 | |
Guido van Rossum | 16d27e3 | 1996-08-21 16:28:53 +0000 | [diff] [blame] | 165 | # This pattern will match a 'stmt' node which *might* represent a docstring; |
| 166 | # docstrings require that the statement which provides the docstring be the |
| 167 | # first statement in the class or function, which this pattern does not check. |
| 168 | # |
| 169 | DOCSTRING_STMT_PATTERN = ( |
| 170 | symbol.stmt, |
| 171 | (symbol.simple_stmt, |
| 172 | (symbol.small_stmt, |
| 173 | (symbol.expr_stmt, |
| 174 | (symbol.testlist, |
| 175 | (symbol.test, |
| 176 | (symbol.and_test, |
| 177 | (symbol.not_test, |
| 178 | (symbol.comparison, |
| 179 | (symbol.expr, |
| 180 | (symbol.xor_expr, |
| 181 | (symbol.and_expr, |
| 182 | (symbol.shift_expr, |
| 183 | (symbol.arith_expr, |
| 184 | (symbol.term, |
| 185 | (symbol.factor, |
| 186 | (symbol.power, |
| 187 | (symbol.atom, |
| 188 | (token.STRING, ['docstring']) |
| 189 | )))))))))))))))), |
| 190 | (token.NEWLINE, '') |
| 191 | )) |
| 192 | |
| 193 | # |
| 194 | # end of file |