blob: 6674b71ff094eec27d7f009c3460efcb9704668b [file] [log] [blame]
Guido van Rossum0a6f9542002-12-03 08:14:35 +00001"""Parse a Python module and describe its classes and methods.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00002
Guido van Rossum0a6f9542002-12-03 08:14:35 +00003Parse enough of a Python file to recognize imports and class and
4method definitions, and to find out the superclasses of a class.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00005
6The interface consists of a single function:
Guido van Rossum0a6f9542002-12-03 08:14:35 +00007 readmodule_ex(module [, path])
8where module is the name of a Python module, and path is an optional
9list of directories where the module is to be searched. If present,
10path is prepended to the system search path sys.path. The return
11value is a dictionary. The keys of the dictionary are the names of
12the classes defined in the module (including classes that are defined
13via the from XXX import YYY construct). The values are class
14instances of the class Class defined here. One special key/value pair
15is present for packages: the key '__path__' has a list as its value
16which contains the package search path.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000017
18A class is described by the class Class in this module. Instances
19of this class have the following instance variables:
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000020 module -- the module name
Tim Peters2344fae2001-01-15 00:50:52 +000021 name -- the name of the class
22 super -- a list of super classes (Class instances)
23 methods -- a dictionary of methods
24 file -- the file in which the class was defined
25 lineno -- the line in the file on which the class statement occurred
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000026The dictionary of methods uses the method names as keys and the line
27numbers on which the method was defined as values.
28If the name of a super class is not recognized, the corresponding
29entry in the list of super classes is not a class instance but a
30string giving the name of the super class. Since import statements
31are recognized and imported modules are scanned as well, this
32shouldn't happen often.
33
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000034A function is described by the class Function in this module.
35Instances of this class have the following instance variables:
36 module -- the module name
37 name -- the name of the class
38 file -- the file in which the class was defined
39 lineno -- the line in the file on which the class statement occurred
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +000040"""
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000041
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000042import sys
43import imp
Guido van Rossum040d7ca2002-08-23 01:36:01 +000044import tokenize # Python tokenizer
Guido van Rossum0a6f9542002-12-03 08:14:35 +000045from token import NAME, DEDENT, NEWLINE
Raymond Hettinger3375fc52003-12-01 20:12:15 +000046from operator import itemgetter
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000047
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000048__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
Skip Montanaroc62c81e2001-02-12 02:00:42 +000049
Guido van Rossumad380551999-06-07 15:25:18 +000050_modules = {} # cache of modules we've seen
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000051
52# each Python class is represented by an instance of this class
53class Class:
Tim Peters2344fae2001-01-15 00:50:52 +000054 '''Class to represent a Python class.'''
55 def __init__(self, module, name, super, file, lineno):
56 self.module = module
57 self.name = name
58 if super is None:
59 super = []
60 self.super = super
61 self.methods = {}
62 self.file = file
63 self.lineno = lineno
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000064
Tim Peters2344fae2001-01-15 00:50:52 +000065 def _addmethod(self, name, lineno):
66 self.methods[name] = lineno
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000067
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000068class Function:
Tim Peters2344fae2001-01-15 00:50:52 +000069 '''Class to represent a top-level Python function'''
70 def __init__(self, module, name, file, lineno):
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000071 self.module = module
72 self.name = name
73 self.file = file
74 self.lineno = lineno
Guido van Rossuma3b4a331999-06-10 14:39:39 +000075
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000076def readmodule(module, path=[]):
Tim Peters2344fae2001-01-15 00:50:52 +000077 '''Backwards compatible interface.
Guido van Rossuma3b4a331999-06-10 14:39:39 +000078
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000079 Call readmodule_ex() and then only keep Class objects from the
Tim Peters2344fae2001-01-15 00:50:52 +000080 resulting dictionary.'''
Guido van Rossuma3b4a331999-06-10 14:39:39 +000081
Guido van Rossum0a6f9542002-12-03 08:14:35 +000082 dict = _readmodule(module, path)
Tim Peters2344fae2001-01-15 00:50:52 +000083 res = {}
84 for key, value in dict.items():
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000085 if isinstance(value, Class):
Tim Peters2344fae2001-01-15 00:50:52 +000086 res[key] = value
87 return res
Guido van Rossuma3b4a331999-06-10 14:39:39 +000088
Guido van Rossum0a6f9542002-12-03 08:14:35 +000089def readmodule_ex(module, path=[]):
Tim Peters2344fae2001-01-15 00:50:52 +000090 '''Read a module file and return a dictionary of classes.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000091
Tim Peters2344fae2001-01-15 00:50:52 +000092 Search for MODULE in PATH and sys.path, read and parse the
93 module and return a dictionary with one entry for each class
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000094 found in the module.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000095
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000096 If INPACKAGE is true, it must be the dotted name of the package in
97 which we are searching for a submodule, and then PATH must be the
98 package search path; otherwise, we are searching for a top-level
99 module, and PATH is combined with sys.path.
100 '''
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000101 return _readmodule(module, path)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000102
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000103def _readmodule(module, path, inpackage=None):
104 '''Do the hard work for readmodule[_ex].'''
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000105 # Compute the full module name (prepending inpackage if set)
106 if inpackage:
107 fullmodule = "%s.%s" % (inpackage, module)
108 else:
109 fullmodule = module
110
111 # Check in the cache
112 if fullmodule in _modules:
113 return _modules[fullmodule]
114
115 # Initialize the dict for this module's contents
Tim Peters2344fae2001-01-15 00:50:52 +0000116 dict = {}
Guido van Rossum3d548711999-06-09 15:49:09 +0000117
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000118 # Check if it is a built-in module; we don't do much for these
119 if module in sys.builtin_module_names and not inpackage:
Tim Peters2344fae2001-01-15 00:50:52 +0000120 _modules[module] = dict
121 return dict
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000122
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000123 # Check for a dotted module name
124 i = module.rfind('.')
125 if i >= 0:
126 package = module[:i]
127 submodule = module[i+1:]
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000128 parent = _readmodule(package, path, inpackage)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000129 if inpackage:
130 package = "%s.%s" % (inpackage, package)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000131 return _readmodule(submodule, parent['__path__'], package)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000132
133 # Search the path for the module
Tim Peters2344fae2001-01-15 00:50:52 +0000134 f = None
135 if inpackage:
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000136 f, file, (suff, mode, type) = imp.find_module(module, path)
137 else:
138 f, file, (suff, mode, type) = imp.find_module(module, path + sys.path)
Tim Peters2344fae2001-01-15 00:50:52 +0000139 if type == imp.PKG_DIRECTORY:
140 dict['__path__'] = [file]
Tim Peters2344fae2001-01-15 00:50:52 +0000141 path = [file] + path
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000142 f, file, (suff, mode, type) = imp.find_module('__init__', [file])
143 _modules[fullmodule] = dict
Tim Peters2344fae2001-01-15 00:50:52 +0000144 if type != imp.PY_SOURCE:
145 # not Python source, can't do anything with this module
146 f.close()
Tim Peters2344fae2001-01-15 00:50:52 +0000147 return dict
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000148
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000149 stack = [] # stack of (class, indent) pairs
Guido van Rossumad380551999-06-07 15:25:18 +0000150
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000151 g = tokenize.generate_tokens(f.readline)
152 try:
153 for tokentype, token, start, end, line in g:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000154 if tokentype == DEDENT:
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000155 lineno, thisindent = start
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000156 # close nested classes and defs
157 while stack and stack[-1][1] >= thisindent:
158 del stack[-1]
159 elif token == 'def':
160 lineno, thisindent = start
161 # close previous nested classes and defs
162 while stack and stack[-1][1] >= thisindent:
163 del stack[-1]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000164 tokentype, meth_name, start, end, line = g.next()
165 if tokentype != NAME:
166 continue # Syntax error
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000167 if stack:
168 cur_class = stack[-1][0]
169 if isinstance(cur_class, Class):
170 # it's a method
171 cur_class._addmethod(meth_name, lineno)
172 # else it's a nested def
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000173 else:
174 # it's a function
175 dict[meth_name] = Function(module, meth_name, file, lineno)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000176 stack.append((None, thisindent)) # Marker for nested fns
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000177 elif token == 'class':
178 lineno, thisindent = start
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000179 # close previous nested classes and defs
180 while stack and stack[-1][1] >= thisindent:
181 del stack[-1]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000182 tokentype, class_name, start, end, line = g.next()
183 if tokentype != NAME:
184 continue # Syntax error
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000185 # parse what follows the class name
186 tokentype, token, start, end, line = g.next()
187 inherit = None
188 if token == '(':
189 names = [] # List of superclasses
190 # there's a list of superclasses
191 level = 1
192 super = [] # Tokens making up current superclass
193 while True:
194 tokentype, token, start, end, line = g.next()
195 if token in (')', ',') and level == 1:
196 n = "".join(super)
197 if n in dict:
198 # we know this super class
199 n = dict[n]
200 else:
201 c = n.split('.')
202 if len(c) > 1:
203 # super class is of the form
204 # module.class: look in module for
205 # class
206 m = c[-2]
207 c = c[-1]
208 if m in _modules:
209 d = _modules[m]
210 if c in d:
211 n = d[c]
212 names.append(n)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000213 super = []
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000214 if token == '(':
215 level += 1
216 elif token == ')':
217 level -= 1
218 if level == 0:
219 break
220 elif token == ',' and level == 1:
221 pass
222 else:
223 super.append(token)
224 inherit = names
225 cur_class = Class(module, class_name, inherit, file, lineno)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000226 if not stack:
227 dict[class_name] = cur_class
228 stack.append((cur_class, thisindent))
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000229 elif token == 'import' and start[1] == 0:
230 modules = _getnamelist(g)
231 for mod, mod2 in modules:
Guido van Rossum258cba82002-09-16 16:36:02 +0000232 try:
233 # Recursively read the imported module
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000234 if not inpackage:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000235 _readmodule(mod, path)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000236 else:
237 try:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000238 _readmodule(mod, path, inpackage)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000239 except ImportError:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000240 _readmodule(mod, [])
Guido van Rossum258cba82002-09-16 16:36:02 +0000241 except:
242 # If we can't find or parse the imported module,
243 # too bad -- don't die here.
244 pass
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000245 elif token == 'from' and start[1] == 0:
246 mod, token = _getname(g)
247 if not mod or token != "import":
248 continue
249 names = _getnamelist(g)
Tim Peters2344fae2001-01-15 00:50:52 +0000250 try:
Guido van Rossum258cba82002-09-16 16:36:02 +0000251 # Recursively read the imported module
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000252 d = _readmodule(mod, path, inpackage)
Tim Peters2344fae2001-01-15 00:50:52 +0000253 except:
Guido van Rossum258cba82002-09-16 16:36:02 +0000254 # If we can't find or parse the imported module,
255 # too bad -- don't die here.
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000256 continue
257 # add any classes that were defined in the imported module
258 # to our name space if they were mentioned in the list
259 for n, n2 in names:
260 if n in d:
261 dict[n2 or n] = d[n]
262 elif n == '*':
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000263 # don't add names that start with _
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000264 for n in d:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000265 if n[0] != '_':
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000266 dict[n] = d[n]
267 except StopIteration:
268 pass
Guido van Rossumad380551999-06-07 15:25:18 +0000269
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000270 f.close()
Tim Peters2344fae2001-01-15 00:50:52 +0000271 return dict
Guido van Rossumdf9f7a31999-06-08 12:53:21 +0000272
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000273def _getnamelist(g):
274 # Helper to get a comma-separated list of dotted names plus 'as'
275 # clauses. Return a list of pairs (name, name2) where name2 is
276 # the 'as' name, or None if there is no 'as' clause.
277 names = []
278 while True:
279 name, token = _getname(g)
280 if not name:
281 break
282 if token == 'as':
283 name2, token = _getname(g)
284 else:
285 name2 = None
286 names.append((name, name2))
287 while token != "," and "\n" not in token:
288 tokentype, token, start, end, line = g.next()
289 if token != ",":
290 break
291 return names
292
293def _getname(g):
294 # Helper to get a dotted name, return a pair (name, token) where
295 # name is the dotted name, or None if there was no dotted name,
296 # and token is the next input token.
297 parts = []
298 tokentype, token, start, end, line = g.next()
299 if tokentype != NAME and token != '*':
300 return (None, token)
301 parts.append(token)
302 while True:
303 tokentype, token, start, end, line = g.next()
304 if token != '.':
305 break
306 tokentype, token, start, end, line = g.next()
307 if tokentype != NAME:
308 break
309 parts.append(token)
310 return (".".join(parts), token)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000311
312def _main():
313 # Main program for testing.
314 import os
315 mod = sys.argv[1]
316 if os.path.exists(mod):
317 path = [os.path.dirname(mod)]
318 mod = os.path.basename(mod)
319 if mod.lower().endswith(".py"):
320 mod = mod[:-3]
321 else:
322 path = []
323 dict = readmodule_ex(mod, path)
324 objs = dict.values()
325 objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0),
326 getattr(b, 'lineno', 0)))
327 for obj in objs:
328 if isinstance(obj, Class):
329 print "class", obj.name, obj.super, obj.lineno
Raymond Hettinger3375fc52003-12-01 20:12:15 +0000330 methods = list.sorted(obj.methods.iteritems(), key=itemgetter(1))
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000331 for name, lineno in methods:
332 if name != "__path__":
333 print " def", name, lineno
334 elif isinstance(obj, Function):
335 print "def", obj.name, obj.lineno
336
337if __name__ == "__main__":
338 _main()