blob: 26cc0ce0dade507065f6e232c8c6c187bda30703 [file] [log] [blame]
Guido van Rossum0a6f9542002-12-03 08:14:35 +00001"""Parse a Python module and describe its classes and methods.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00002
Guido van Rossum0a6f9542002-12-03 08:14:35 +00003Parse enough of a Python file to recognize imports and class and
4method definitions, and to find out the superclasses of a class.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00005
6The interface consists of a single function:
Guido van Rossum0a6f9542002-12-03 08:14:35 +00007 readmodule_ex(module [, path])
8where module is the name of a Python module, and path is an optional
9list of directories where the module is to be searched. If present,
10path is prepended to the system search path sys.path. The return
11value is a dictionary. The keys of the dictionary are the names of
12the classes defined in the module (including classes that are defined
13via the from XXX import YYY construct). The values are class
14instances of the class Class defined here. One special key/value pair
15is present for packages: the key '__path__' has a list as its value
16which contains the package search path.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000017
18A class is described by the class Class in this module. Instances
19of this class have the following instance variables:
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000020 module -- the module name
Tim Peters2344fae2001-01-15 00:50:52 +000021 name -- the name of the class
22 super -- a list of super classes (Class instances)
23 methods -- a dictionary of methods
24 file -- the file in which the class was defined
25 lineno -- the line in the file on which the class statement occurred
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000026The dictionary of methods uses the method names as keys and the line
27numbers on which the method was defined as values.
28If the name of a super class is not recognized, the corresponding
29entry in the list of super classes is not a class instance but a
30string giving the name of the super class. Since import statements
31are recognized and imported modules are scanned as well, this
32shouldn't happen often.
33
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000034A function is described by the class Function in this module.
35Instances of this class have the following instance variables:
36 module -- the module name
37 name -- the name of the class
38 file -- the file in which the class was defined
39 lineno -- the line in the file on which the class statement occurred
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +000040"""
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000041
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000042import sys
43import imp
Guido van Rossum040d7ca2002-08-23 01:36:01 +000044import tokenize # Python tokenizer
Guido van Rossum0a6f9542002-12-03 08:14:35 +000045from token import NAME, DEDENT, NEWLINE
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000046
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000047__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
Skip Montanaroc62c81e2001-02-12 02:00:42 +000048
Guido van Rossumad380551999-06-07 15:25:18 +000049_modules = {} # cache of modules we've seen
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000050
51# each Python class is represented by an instance of this class
52class Class:
Tim Peters2344fae2001-01-15 00:50:52 +000053 '''Class to represent a Python class.'''
54 def __init__(self, module, name, super, file, lineno):
55 self.module = module
56 self.name = name
57 if super is None:
58 super = []
59 self.super = super
60 self.methods = {}
61 self.file = file
62 self.lineno = lineno
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000063
Tim Peters2344fae2001-01-15 00:50:52 +000064 def _addmethod(self, name, lineno):
65 self.methods[name] = lineno
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000066
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000067class Function:
Tim Peters2344fae2001-01-15 00:50:52 +000068 '''Class to represent a top-level Python function'''
69 def __init__(self, module, name, file, lineno):
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000070 self.module = module
71 self.name = name
72 self.file = file
73 self.lineno = lineno
Guido van Rossuma3b4a331999-06-10 14:39:39 +000074
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000075def readmodule(module, path=[]):
Tim Peters2344fae2001-01-15 00:50:52 +000076 '''Backwards compatible interface.
Guido van Rossuma3b4a331999-06-10 14:39:39 +000077
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000078 Call readmodule_ex() and then only keep Class objects from the
Tim Peters2344fae2001-01-15 00:50:52 +000079 resulting dictionary.'''
Guido van Rossuma3b4a331999-06-10 14:39:39 +000080
Guido van Rossum0a6f9542002-12-03 08:14:35 +000081 dict = _readmodule(module, path)
Tim Peters2344fae2001-01-15 00:50:52 +000082 res = {}
83 for key, value in dict.items():
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000084 if isinstance(value, Class):
Tim Peters2344fae2001-01-15 00:50:52 +000085 res[key] = value
86 return res
Guido van Rossuma3b4a331999-06-10 14:39:39 +000087
Guido van Rossum0a6f9542002-12-03 08:14:35 +000088def readmodule_ex(module, path=[]):
Tim Peters2344fae2001-01-15 00:50:52 +000089 '''Read a module file and return a dictionary of classes.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000090
Tim Peters2344fae2001-01-15 00:50:52 +000091 Search for MODULE in PATH and sys.path, read and parse the
92 module and return a dictionary with one entry for each class
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000093 found in the module.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000094
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000095 If INPACKAGE is true, it must be the dotted name of the package in
96 which we are searching for a submodule, and then PATH must be the
97 package search path; otherwise, we are searching for a top-level
98 module, and PATH is combined with sys.path.
99 '''
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000100 return _readmodule(module, path)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000101
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000102def _readmodule(module, path, inpackage=None):
103 '''Do the hard work for readmodule[_ex].'''
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000104 # Compute the full module name (prepending inpackage if set)
105 if inpackage:
106 fullmodule = "%s.%s" % (inpackage, module)
107 else:
108 fullmodule = module
109
110 # Check in the cache
111 if fullmodule in _modules:
112 return _modules[fullmodule]
113
114 # Initialize the dict for this module's contents
Tim Peters2344fae2001-01-15 00:50:52 +0000115 dict = {}
Guido van Rossum3d548711999-06-09 15:49:09 +0000116
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000117 # Check if it is a built-in module; we don't do much for these
118 if module in sys.builtin_module_names and not inpackage:
Tim Peters2344fae2001-01-15 00:50:52 +0000119 _modules[module] = dict
120 return dict
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000121
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000122 # Check for a dotted module name
123 i = module.rfind('.')
124 if i >= 0:
125 package = module[:i]
126 submodule = module[i+1:]
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000127 parent = _readmodule(package, path, inpackage)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000128 if inpackage:
129 package = "%s.%s" % (inpackage, package)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000130 return _readmodule(submodule, parent['__path__'], package)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000131
132 # Search the path for the module
Tim Peters2344fae2001-01-15 00:50:52 +0000133 f = None
134 if inpackage:
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000135 f, file, (suff, mode, type) = imp.find_module(module, path)
136 else:
137 f, file, (suff, mode, type) = imp.find_module(module, path + sys.path)
Tim Peters2344fae2001-01-15 00:50:52 +0000138 if type == imp.PKG_DIRECTORY:
139 dict['__path__'] = [file]
Tim Peters2344fae2001-01-15 00:50:52 +0000140 path = [file] + path
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000141 f, file, (suff, mode, type) = imp.find_module('__init__', [file])
142 _modules[fullmodule] = dict
Tim Peters2344fae2001-01-15 00:50:52 +0000143 if type != imp.PY_SOURCE:
144 # not Python source, can't do anything with this module
145 f.close()
Tim Peters2344fae2001-01-15 00:50:52 +0000146 return dict
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000147
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000148 stack = [] # stack of (class, indent) pairs
Guido van Rossumad380551999-06-07 15:25:18 +0000149
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000150 g = tokenize.generate_tokens(f.readline)
151 try:
152 for tokentype, token, start, end, line in g:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000153 if tokentype == DEDENT:
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000154 lineno, thisindent = start
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000155 # close nested classes and defs
156 while stack and stack[-1][1] >= thisindent:
157 del stack[-1]
158 elif token == 'def':
159 lineno, thisindent = start
160 # close previous nested classes and defs
161 while stack and stack[-1][1] >= thisindent:
162 del stack[-1]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000163 tokentype, meth_name, start, end, line = g.next()
164 if tokentype != NAME:
165 continue # Syntax error
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000166 if stack:
167 cur_class = stack[-1][0]
168 if isinstance(cur_class, Class):
169 # it's a method
170 cur_class._addmethod(meth_name, lineno)
171 # else it's a nested def
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000172 else:
173 # it's a function
174 dict[meth_name] = Function(module, meth_name, file, lineno)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000175 stack.append((None, thisindent)) # Marker for nested fns
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000176 elif token == 'class':
177 lineno, thisindent = start
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000178 # close previous nested classes and defs
179 while stack and stack[-1][1] >= thisindent:
180 del stack[-1]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000181 tokentype, class_name, start, end, line = g.next()
182 if tokentype != NAME:
183 continue # Syntax error
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000184 # parse what follows the class name
185 tokentype, token, start, end, line = g.next()
186 inherit = None
187 if token == '(':
188 names = [] # List of superclasses
189 # there's a list of superclasses
190 level = 1
191 super = [] # Tokens making up current superclass
192 while True:
193 tokentype, token, start, end, line = g.next()
194 if token in (')', ',') and level == 1:
195 n = "".join(super)
196 if n in dict:
197 # we know this super class
198 n = dict[n]
199 else:
200 c = n.split('.')
201 if len(c) > 1:
202 # super class is of the form
203 # module.class: look in module for
204 # class
205 m = c[-2]
206 c = c[-1]
207 if m in _modules:
208 d = _modules[m]
209 if c in d:
210 n = d[c]
211 names.append(n)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000212 super = []
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000213 if token == '(':
214 level += 1
215 elif token == ')':
216 level -= 1
217 if level == 0:
218 break
219 elif token == ',' and level == 1:
220 pass
221 else:
222 super.append(token)
223 inherit = names
224 cur_class = Class(module, class_name, inherit, file, lineno)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000225 if not stack:
226 dict[class_name] = cur_class
227 stack.append((cur_class, thisindent))
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000228 elif token == 'import' and start[1] == 0:
229 modules = _getnamelist(g)
230 for mod, mod2 in modules:
Guido van Rossum258cba82002-09-16 16:36:02 +0000231 try:
232 # Recursively read the imported module
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000233 if not inpackage:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000234 _readmodule(mod, path)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000235 else:
236 try:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000237 _readmodule(mod, path, inpackage)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000238 except ImportError:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000239 _readmodule(mod, [])
Guido van Rossum258cba82002-09-16 16:36:02 +0000240 except:
241 # If we can't find or parse the imported module,
242 # too bad -- don't die here.
243 pass
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000244 elif token == 'from' and start[1] == 0:
245 mod, token = _getname(g)
246 if not mod or token != "import":
247 continue
248 names = _getnamelist(g)
Tim Peters2344fae2001-01-15 00:50:52 +0000249 try:
Guido van Rossum258cba82002-09-16 16:36:02 +0000250 # Recursively read the imported module
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000251 d = _readmodule(mod, path, inpackage)
Tim Peters2344fae2001-01-15 00:50:52 +0000252 except:
Guido van Rossum258cba82002-09-16 16:36:02 +0000253 # If we can't find or parse the imported module,
254 # too bad -- don't die here.
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000255 continue
256 # add any classes that were defined in the imported module
257 # to our name space if they were mentioned in the list
258 for n, n2 in names:
259 if n in d:
260 dict[n2 or n] = d[n]
261 elif n == '*':
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000262 # don't add names that start with _
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000263 for n in d:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000264 if n[0] != '_':
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000265 dict[n] = d[n]
266 except StopIteration:
267 pass
Guido van Rossumad380551999-06-07 15:25:18 +0000268
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000269 f.close()
Tim Peters2344fae2001-01-15 00:50:52 +0000270 return dict
Guido van Rossumdf9f7a31999-06-08 12:53:21 +0000271
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000272def _getnamelist(g):
273 # Helper to get a comma-separated list of dotted names plus 'as'
274 # clauses. Return a list of pairs (name, name2) where name2 is
275 # the 'as' name, or None if there is no 'as' clause.
276 names = []
277 while True:
278 name, token = _getname(g)
279 if not name:
280 break
281 if token == 'as':
282 name2, token = _getname(g)
283 else:
284 name2 = None
285 names.append((name, name2))
286 while token != "," and "\n" not in token:
287 tokentype, token, start, end, line = g.next()
288 if token != ",":
289 break
290 return names
291
292def _getname(g):
293 # Helper to get a dotted name, return a pair (name, token) where
294 # name is the dotted name, or None if there was no dotted name,
295 # and token is the next input token.
296 parts = []
297 tokentype, token, start, end, line = g.next()
298 if tokentype != NAME and token != '*':
299 return (None, token)
300 parts.append(token)
301 while True:
302 tokentype, token, start, end, line = g.next()
303 if token != '.':
304 break
305 tokentype, token, start, end, line = g.next()
306 if tokentype != NAME:
307 break
308 parts.append(token)
309 return (".".join(parts), token)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000310
311def _main():
312 # Main program for testing.
313 import os
314 mod = sys.argv[1]
315 if os.path.exists(mod):
316 path = [os.path.dirname(mod)]
317 mod = os.path.basename(mod)
318 if mod.lower().endswith(".py"):
319 mod = mod[:-3]
320 else:
321 path = []
322 dict = readmodule_ex(mod, path)
323 objs = dict.values()
324 objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0),
325 getattr(b, 'lineno', 0)))
326 for obj in objs:
327 if isinstance(obj, Class):
328 print "class", obj.name, obj.super, obj.lineno
329 methods = obj.methods.items()
330 methods.sort(lambda a, b: cmp(a[1], b[1]))
331 for name, lineno in methods:
332 if name != "__path__":
333 print " def", name, lineno
334 elif isinstance(obj, Function):
335 print "def", obj.name, obj.lineno
336
337if __name__ == "__main__":
338 _main()