blob: 709a07bf3661cf3a2fdaf6c3e9993d662b28d05a [file] [log] [blame]
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00001'''Parse a Python file and retrieve classes and methods.
2
3Parse enough of a Python file to recognize class and method
4definitions and to find out the superclasses of a class.
5
6The interface consists of a single function:
7 readmodule(module, path)
8module is the name of a Python module, path is an optional list of
9directories where the module is to be searched. If present, path is
10prepended to the system search path sys.path.
11The return value is a dictionary. The keys of the dictionary are
12the names of the classes defined in the module (including classes
13that are defined via the from XXX import YYY construct). The values
14are class instances of the class Class defined here.
15
16A class is described by the class Class in this module. Instances
17of this class have the following instance variables:
18 name -- the name of the class
19 super -- a list of super classes (Class instances)
20 methods -- a dictionary of methods
21 file -- the file in which the class was defined
22 lineno -- the line in the file on which the class statement occurred
23The dictionary of methods uses the method names as keys and the line
24numbers on which the method was defined as values.
25If the name of a super class is not recognized, the corresponding
26entry in the list of super classes is not a class instance but a
27string giving the name of the super class. Since import statements
28are recognized and imported modules are scanned as well, this
29shouldn't happen often.
30
31BUGS
Guido van Rossumdf9f7a31999-06-08 12:53:21 +000032Continuation lines are not dealt with at all.
33While triple-quoted strings won't confuse it, lines that look like
34def, class, import or "from ... import" stmts inside backslash-continued
35single-quoted strings are treated like code. The expense of stopping
36that isn't worth it.
37Code that doesn't pass tabnanny or python -t will confuse it, unless
38you set the module TABWIDTH vrbl (default 8) to the correct tab width
39for the file.''' # ' <-- bow to font lock
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000040
41import os
42import sys
43import imp
Guido van Rossum31626bc1997-10-24 14:46:16 +000044import re
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000045import string
46
Guido van Rossumdf9f7a31999-06-08 12:53:21 +000047TABWIDTH = 8
48
Guido van Rossumad380551999-06-07 15:25:18 +000049_getnext = re.compile(r"""
Guido van Rossumdf9f7a31999-06-08 12:53:21 +000050 (?P<String>
51 \""" [^"\\]* (?:
52 (?: \\. | "(?!"") )
53 [^"\\]*
54 )*
55 \"""
56
57 | ''' [^'\\]* (?:
58 (?: \\. | '(?!'') )
59 [^'\\]*
60 )*
61 '''
62 )
63
64| (?P<Method>
65 ^
66 (?P<MethodIndent> [ \t]* )
67 def [ \t]+
Guido van Rossumad380551999-06-07 15:25:18 +000068 (?P<MethodName> [a-zA-Z_] \w* )
69 [ \t]* \(
70 )
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000071
Guido van Rossumad380551999-06-07 15:25:18 +000072| (?P<Class>
Guido van Rossumdf9f7a31999-06-08 12:53:21 +000073 ^
74 (?P<ClassIndent> [ \t]* )
75 class [ \t]+
Guido van Rossumad380551999-06-07 15:25:18 +000076 (?P<ClassName> [a-zA-Z_] \w* )
77 [ \t]*
78 (?P<ClassSupers> \( [^)\n]* \) )?
79 [ \t]* :
80 )
81
82| (?P<Import>
83 ^ import [ \t]+
84 (?P<ImportList> [^#;\n]+ )
85 )
86
87| (?P<ImportFrom>
88 ^ from [ \t]+
89 (?P<ImportFromPath>
90 [a-zA-Z_] \w*
91 (?:
92 [ \t]* \. [ \t]* [a-zA-Z_] \w*
93 )*
94 )
95 [ \t]+
96 import [ \t]+
97 (?P<ImportFromList> [^#;\n]+ )
98 )
Guido van Rossumad380551999-06-07 15:25:18 +000099""", re.VERBOSE | re.DOTALL | re.MULTILINE).search
100
101_modules = {} # cache of modules we've seen
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000102
103# each Python class is represented by an instance of this class
104class Class:
105 '''Class to represent a Python class.'''
Sjoerd Mullender825bae71995-11-02 17:21:33 +0000106 def __init__(self, module, name, super, file, lineno):
107 self.module = module
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000108 self.name = name
109 if super is None:
110 super = []
111 self.super = super
112 self.methods = {}
113 self.file = file
114 self.lineno = lineno
115
116 def _addmethod(self, name, lineno):
117 self.methods[name] = lineno
118
Guido van Rossum7a840e81998-10-12 15:21:38 +0000119def readmodule(module, path=[], inpackage=0):
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000120 '''Read a module file and return a dictionary of classes.
121
122 Search for MODULE in PATH and sys.path, read and parse the
123 module and return a dictionary with one entry for each class
124 found in the module.'''
125
Guido van Rossum7a840e81998-10-12 15:21:38 +0000126 i = string.rfind(module, '.')
127 if i >= 0:
128 # Dotted module name
Guido van Rossum06884361998-10-12 15:23:04 +0000129 package = string.strip(module[:i])
130 submodule = string.strip(module[i+1:])
Guido van Rossum7a840e81998-10-12 15:21:38 +0000131 parent = readmodule(package, path, inpackage)
132 child = readmodule(submodule, parent['__path__'], 1)
133 return child
134
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000135 if _modules.has_key(module):
136 # we've seen this module before...
137 return _modules[module]
138 if module in sys.builtin_module_names:
139 # this is a built-in module
140 dict = {}
141 _modules[module] = dict
142 return dict
143
144 # search the path for the module
145 f = None
Guido van Rossum7a840e81998-10-12 15:21:38 +0000146 if inpackage:
147 try:
148 f, file, (suff, mode, type) = \
149 imp.find_module(module, path)
150 except ImportError:
151 f = None
152 if f is None:
Fred Drake3d199af1999-02-18 20:51:50 +0000153 fullpath = list(path) + sys.path
Guido van Rossum7a840e81998-10-12 15:21:38 +0000154 f, file, (suff, mode, type) = imp.find_module(module, fullpath)
155 if type == imp.PKG_DIRECTORY:
156 dict = {'__path__': [file]}
157 _modules[module] = dict
158 # XXX Should we recursively look for submodules?
159 return dict
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000160 if type != imp.PY_SOURCE:
161 # not Python source, can't do anything with this module
162 f.close()
163 dict = {}
164 _modules[module] = dict
165 return dict
166
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000167 dict = {}
168 _modules[module] = dict
169 imports = []
Guido van Rossumdf9f7a31999-06-08 12:53:21 +0000170 classstack = [] # stack of (class, indent) pairs
Guido van Rossumad380551999-06-07 15:25:18 +0000171 src = f.read()
172 f.close()
173
174 # To avoid having to stop the regexp at each newline, instead
175 # when we need a line number we simply string.count the number of
176 # newlines in the string since the last time we did this; i.e.,
177 # lineno = lineno + \
178 # string.count(src, '\n', last_lineno_pos, here)
179 # last_lineno_pos = here
180 countnl = string.count
181 lineno, last_lineno_pos = 1, 0
182 i = 0
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000183 while 1:
Guido van Rossumad380551999-06-07 15:25:18 +0000184 m = _getnext(src, i)
185 if not m:
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000186 break
Guido van Rossumad380551999-06-07 15:25:18 +0000187 start, i = m.span()
188
Guido van Rossumdf9f7a31999-06-08 12:53:21 +0000189 if m.start("Method") >= 0:
190 # found a method definition or function
191 thisindent = _indent(m.group("MethodIndent"))
192 # close all classes indented at least as much
193 while classstack and \
194 classstack[-1][1] >= thisindent:
195 del classstack[-1]
196 if classstack:
Guido van Rossumad380551999-06-07 15:25:18 +0000197 # and we know the class it belongs to
198 meth_name = m.group("MethodName")
199 lineno = lineno + \
200 countnl(src, '\n',
201 last_lineno_pos, start)
202 last_lineno_pos = start
Guido van Rossumdf9f7a31999-06-08 12:53:21 +0000203 cur_class = classstack[-1][0]
Guido van Rossumad380551999-06-07 15:25:18 +0000204 cur_class._addmethod(meth_name, lineno)
205
Guido van Rossumdf9f7a31999-06-08 12:53:21 +0000206 elif m.start("String") >= 0:
207 pass
208
Guido van Rossumad380551999-06-07 15:25:18 +0000209 elif m.start("Class") >= 0:
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000210 # we found a class definition
Guido van Rossumdf9f7a31999-06-08 12:53:21 +0000211 thisindent = _indent(m.group("ClassIndent"))
212 # close all classes indented at least as much
213 while classstack and \
214 classstack[-1][1] >= thisindent:
215 del classstack[-1]
Guido van Rossumad380551999-06-07 15:25:18 +0000216 lineno = lineno + \
217 countnl(src, '\n', last_lineno_pos, start)
218 last_lineno_pos = start
219 class_name = m.group("ClassName")
220 inherit = m.group("ClassSupers")
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000221 if inherit:
222 # the class inherits from other classes
223 inherit = string.strip(inherit[1:-1])
224 names = []
225 for n in string.splitfields(inherit, ','):
226 n = string.strip(n)
227 if dict.has_key(n):
228 # we know this super class
229 n = dict[n]
230 else:
231 c = string.splitfields(n, '.')
232 if len(c) > 1:
233 # super class
234 # is of the
235 # form module.class:
236 # look in
237 # module for class
238 m = c[-2]
239 c = c[-1]
240 if _modules.has_key(m):
241 d = _modules[m]
242 if d.has_key(c):
243 n = d[c]
244 names.append(n)
245 inherit = names
246 # remember this class
Guido van Rossumad380551999-06-07 15:25:18 +0000247 cur_class = Class(module, class_name, inherit,
248 file, lineno)
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000249 dict[class_name] = cur_class
Guido van Rossumdf9f7a31999-06-08 12:53:21 +0000250 classstack.append((cur_class, thisindent))
Guido van Rossum31626bc1997-10-24 14:46:16 +0000251
Guido van Rossumad380551999-06-07 15:25:18 +0000252 elif m.start("Import") >= 0:
253 # import module
254 for n in string.split(m.group("ImportList"), ','):
255 n = string.strip(n)
256 try:
257 # recursively read the imported module
258 d = readmodule(n, path, inpackage)
259 except:
260 print 'module', n, 'not found'
261
262 elif m.start("ImportFrom") >= 0:
263 # from module import stuff
264 mod = m.group("ImportFromPath")
265 names = string.split(m.group("ImportFromList"), ',')
266 try:
267 # recursively read the imported module
268 d = readmodule(mod, path, inpackage)
269 except:
270 print 'module', mod, 'not found'
271 continue
272 # add any classes that were defined in the
273 # imported module to our name space if they
274 # were mentioned in the list
275 for n in names:
276 n = string.strip(n)
277 if d.has_key(n):
278 dict[n] = d[n]
279 elif n == '*':
280 # only add a name if not
281 # already there (to mimic what
282 # Python does internally)
283 # also don't add names that
284 # start with _
285 for n in d.keys():
286 if n[0] != '_' and \
287 not dict.has_key(n):
288 dict[n] = d[n]
289 else:
290 assert 0, "regexp _getnext found something unexpected"
291
292 return dict
Guido van Rossumdf9f7a31999-06-08 12:53:21 +0000293
294def _indent(ws, _expandtabs=string.expandtabs):
295 return len(_expandtabs(ws, TABWIDTH))