blob: 821cef0cde316cef5e9bf8c0b9bb09893c6453a6 [file] [log] [blame]
Guido van Rossum16d27e31996-08-21 16:28:53 +00001"""Simple code to extract class & function docstrings from a module.
2
Guido van Rossum8206fb91996-08-26 00:33:29 +00003This code is used as an example in the library reference manual in the
4section on using the parser module. Refer to the manual for a thorough
5discussion of the operation of this code.
Guido van Rossum16d27e31996-08-21 16:28:53 +00006"""
7
8import symbol
9import token
10import types
11
12
13def get_docs(fileName):
14 """Retrieve information from the parse tree of a source file.
15
16 fileName
Guido van Rossum4117e541998-09-14 16:44:15 +000017 Name of the file to read Python source code from.
Guido van Rossum16d27e31996-08-21 16:28:53 +000018 """
19 source = open(fileName).read()
20 import os
21 basename = os.path.basename(os.path.splitext(fileName)[0])
22 import parser
23 ast = parser.suite(source)
24 tup = parser.ast2tuple(ast)
25 return ModuleInfo(tup, basename)
26
27
Guido van Rossum8206fb91996-08-26 00:33:29 +000028class SuiteInfoBase:
Guido van Rossum16d27e31996-08-21 16:28:53 +000029 _docstring = ''
30 _name = ''
31
Guido van Rossum8206fb91996-08-26 00:33:29 +000032 def __init__(self, tree = None):
Guido van Rossum4117e541998-09-14 16:44:15 +000033 self._class_info = {}
34 self._function_info = {}
35 if tree:
36 self._extract_info(tree)
Guido van Rossum8206fb91996-08-26 00:33:29 +000037
38 def _extract_info(self, tree):
Guido van Rossum4117e541998-09-14 16:44:15 +000039 # extract docstring
40 if len(tree) == 2:
41 found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1])
42 else:
43 found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
44 if found:
45 self._docstring = eval(vars['docstring'])
46 # discover inner definitions
47 for node in tree[1:]:
48 found, vars = match(COMPOUND_STMT_PATTERN, node)
49 if found:
50 cstmt = vars['compound']
51 if cstmt[0] == symbol.funcdef:
52 name = cstmt[2][1]
53 self._function_info[name] = FunctionInfo(cstmt)
54 elif cstmt[0] == symbol.classdef:
55 name = cstmt[2][1]
56 self._class_info[name] = ClassInfo(cstmt)
Guido van Rossum16d27e31996-08-21 16:28:53 +000057
58 def get_docstring(self):
Guido van Rossum4117e541998-09-14 16:44:15 +000059 return self._docstring
Guido van Rossum16d27e31996-08-21 16:28:53 +000060
61 def get_name(self):
Guido van Rossum4117e541998-09-14 16:44:15 +000062 return self._name
Guido van Rossum16d27e31996-08-21 16:28:53 +000063
Guido van Rossum16d27e31996-08-21 16:28:53 +000064 def get_class_names(self):
Guido van Rossum4117e541998-09-14 16:44:15 +000065 return self._class_info.keys()
Guido van Rossum16d27e31996-08-21 16:28:53 +000066
67 def get_class_info(self, name):
Guido van Rossum4117e541998-09-14 16:44:15 +000068 return self._class_info[name]
Guido van Rossum16d27e31996-08-21 16:28:53 +000069
Guido van Rossum8206fb91996-08-26 00:33:29 +000070 def __getitem__(self, name):
Guido van Rossum4117e541998-09-14 16:44:15 +000071 try:
72 return self._class_info[name]
73 except KeyError:
74 return self._function_info[name]
Guido van Rossum16d27e31996-08-21 16:28:53 +000075
76
Guido van Rossum8206fb91996-08-26 00:33:29 +000077class SuiteFuncInfo:
78 # Mixin class providing access to function names and info.
Guido van Rossum16d27e31996-08-21 16:28:53 +000079
80 def get_function_names(self):
Guido van Rossum4117e541998-09-14 16:44:15 +000081 return self._function_info.keys()
Guido van Rossum16d27e31996-08-21 16:28:53 +000082
83 def get_function_info(self, name):
Guido van Rossum4117e541998-09-14 16:44:15 +000084 return self._function_info[name]
Guido van Rossum16d27e31996-08-21 16:28:53 +000085
86
Guido van Rossum8206fb91996-08-26 00:33:29 +000087class FunctionInfo(SuiteInfoBase, SuiteFuncInfo):
88 def __init__(self, tree = None):
Guido van Rossum4117e541998-09-14 16:44:15 +000089 self._name = tree[2][1]
90 SuiteInfoBase.__init__(self, tree and tree[-1] or None)
Guido van Rossum16d27e31996-08-21 16:28:53 +000091
92
93class ClassInfo(SuiteInfoBase):
Guido van Rossum8206fb91996-08-26 00:33:29 +000094 def __init__(self, tree = None):
Guido van Rossum4117e541998-09-14 16:44:15 +000095 self._name = tree[2][1]
96 SuiteInfoBase.__init__(self, tree and tree[-1] or None)
Guido van Rossum16d27e31996-08-21 16:28:53 +000097
98 def get_method_names(self):
Guido van Rossum4117e541998-09-14 16:44:15 +000099 return self._function_info.keys()
Guido van Rossum16d27e31996-08-21 16:28:53 +0000100
101 def get_method_info(self, name):
Guido van Rossum4117e541998-09-14 16:44:15 +0000102 return self._function_info[name]
Guido van Rossum16d27e31996-08-21 16:28:53 +0000103
104
Guido van Rossum8206fb91996-08-26 00:33:29 +0000105class ModuleInfo(SuiteInfoBase, SuiteFuncInfo):
106 def __init__(self, tree = None, name = "<string>"):
Guido van Rossum4117e541998-09-14 16:44:15 +0000107 self._name = name
108 SuiteInfoBase.__init__(self, tree)
109 if tree:
110 found, vars = match(DOCSTRING_STMT_PATTERN, tree[1])
111 if found:
112 self._docstring = vars["docstring"]
Guido van Rossum16d27e31996-08-21 16:28:53 +0000113
114
115from types import ListType, TupleType
116
117def match(pattern, data, vars=None):
Guido van Rossum8206fb91996-08-26 00:33:29 +0000118 """Match `data' to `pattern', with variable extraction.
119
120 pattern
Guido van Rossum4117e541998-09-14 16:44:15 +0000121 Pattern to match against, possibly containing variables.
Guido van Rossum8206fb91996-08-26 00:33:29 +0000122
123 data
Guido van Rossum4117e541998-09-14 16:44:15 +0000124 Data to be checked and against which variables are extracted.
Guido van Rossum8206fb91996-08-26 00:33:29 +0000125
126 vars
Guido van Rossum4117e541998-09-14 16:44:15 +0000127 Dictionary of variables which have already been found. If not
128 provided, an empty dictionary is created.
Guido van Rossum8206fb91996-08-26 00:33:29 +0000129
130 The `pattern' value may contain variables of the form ['varname'] which
131 are allowed to match anything. The value that is matched is returned as
132 part of a dictionary which maps 'varname' to the matched value. 'varname'
133 is not required to be a string object, but using strings makes patterns
134 and the code which uses them more readable.
135
136 This function returns two values: a boolean indicating whether a match
137 was found and a dictionary mapping variable names to their associated
138 values.
Guido van Rossum16d27e31996-08-21 16:28:53 +0000139 """
140 if vars is None:
Guido van Rossum4117e541998-09-14 16:44:15 +0000141 vars = {}
142 if type(pattern) is ListType: # 'variables' are ['varname']
143 vars[pattern[0]] = data
144 return 1, vars
Guido van Rossum16d27e31996-08-21 16:28:53 +0000145 if type(pattern) is not TupleType:
Guido van Rossum4117e541998-09-14 16:44:15 +0000146 return (pattern == data), vars
Guido van Rossum16d27e31996-08-21 16:28:53 +0000147 if len(data) != len(pattern):
Guido van Rossum4117e541998-09-14 16:44:15 +0000148 return 0, vars
Guido van Rossum16d27e31996-08-21 16:28:53 +0000149 for pattern, data in map(None, pattern, data):
Guido van Rossum4117e541998-09-14 16:44:15 +0000150 same, vars = match(pattern, data, vars)
151 if not same:
152 break
Guido van Rossum16d27e31996-08-21 16:28:53 +0000153 return same, vars
154
155
Guido van Rossum8206fb91996-08-26 00:33:29 +0000156# This pattern identifies compound statements, allowing them to be readily
157# differentiated from simple statements.
158#
159COMPOUND_STMT_PATTERN = (
160 symbol.stmt,
161 (symbol.compound_stmt, ['compound'])
162 )
163
164
Guido van Rossum16d27e31996-08-21 16:28:53 +0000165# This pattern will match a 'stmt' node which *might* represent a docstring;
166# docstrings require that the statement which provides the docstring be the
167# first statement in the class or function, which this pattern does not check.
168#
169DOCSTRING_STMT_PATTERN = (
170 symbol.stmt,
171 (symbol.simple_stmt,
172 (symbol.small_stmt,
173 (symbol.expr_stmt,
174 (symbol.testlist,
Guido van Rossum4117e541998-09-14 16:44:15 +0000175 (symbol.test,
176 (symbol.and_test,
177 (symbol.not_test,
178 (symbol.comparison,
179 (symbol.expr,
180 (symbol.xor_expr,
181 (symbol.and_expr,
182 (symbol.shift_expr,
183 (symbol.arith_expr,
184 (symbol.term,
185 (symbol.factor,
186 (symbol.power,
187 (symbol.atom,
188 (token.STRING, ['docstring'])
189 )))))))))))))))),
Guido van Rossum16d27e31996-08-21 16:28:53 +0000190 (token.NEWLINE, '')
191 ))
192
193#
194# end of file