blob: 65e9fbedfdd1117b11e98cc7ed71b2e9240cb50f [file] [log] [blame]
Guido van Rossum0a6f9542002-12-03 08:14:35 +00001"""Parse a Python module and describe its classes and methods.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00002
Guido van Rossum0a6f9542002-12-03 08:14:35 +00003Parse enough of a Python file to recognize imports and class and
4method definitions, and to find out the superclasses of a class.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00005
6The interface consists of a single function:
Guido van Rossum0a6f9542002-12-03 08:14:35 +00007 readmodule_ex(module [, path])
8where module is the name of a Python module, and path is an optional
9list of directories where the module is to be searched. If present,
10path is prepended to the system search path sys.path. The return
11value is a dictionary. The keys of the dictionary are the names of
12the classes defined in the module (including classes that are defined
13via the from XXX import YYY construct). The values are class
14instances of the class Class defined here. One special key/value pair
15is present for packages: the key '__path__' has a list as its value
16which contains the package search path.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000017
18A class is described by the class Class in this module. Instances
19of this class have the following instance variables:
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000020 module -- the module name
Tim Peters2344fae2001-01-15 00:50:52 +000021 name -- the name of the class
22 super -- a list of super classes (Class instances)
23 methods -- a dictionary of methods
24 file -- the file in which the class was defined
25 lineno -- the line in the file on which the class statement occurred
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000026The dictionary of methods uses the method names as keys and the line
27numbers on which the method was defined as values.
28If the name of a super class is not recognized, the corresponding
29entry in the list of super classes is not a class instance but a
30string giving the name of the super class. Since import statements
31are recognized and imported modules are scanned as well, this
32shouldn't happen often.
33
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000034A function is described by the class Function in this module.
35Instances of this class have the following instance variables:
36 module -- the module name
37 name -- the name of the class
38 file -- the file in which the class was defined
39 lineno -- the line in the file on which the class statement occurred
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +000040"""
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000041
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000042import sys
43import imp
Christian Heimes81ee3ef2008-05-04 22:42:01 +000044import tokenize
45from token import NAME, DEDENT, OP
Raymond Hettinger3375fc52003-12-01 20:12:15 +000046from operator import itemgetter
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000047
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000048__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
Skip Montanaroc62c81e2001-02-12 02:00:42 +000049
Guido van Rossumad380551999-06-07 15:25:18 +000050_modules = {} # cache of modules we've seen
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000051
52# each Python class is represented by an instance of this class
53class Class:
Tim Peters2344fae2001-01-15 00:50:52 +000054 '''Class to represent a Python class.'''
55 def __init__(self, module, name, super, file, lineno):
56 self.module = module
57 self.name = name
58 if super is None:
59 super = []
60 self.super = super
61 self.methods = {}
62 self.file = file
63 self.lineno = lineno
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000064
Tim Peters2344fae2001-01-15 00:50:52 +000065 def _addmethod(self, name, lineno):
66 self.methods[name] = lineno
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000067
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000068class Function:
Tim Peters2344fae2001-01-15 00:50:52 +000069 '''Class to represent a top-level Python function'''
70 def __init__(self, module, name, file, lineno):
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000071 self.module = module
72 self.name = name
73 self.file = file
74 self.lineno = lineno
Guido van Rossuma3b4a331999-06-10 14:39:39 +000075
Christian Heimes81ee3ef2008-05-04 22:42:01 +000076def readmodule(module, path=None):
Tim Peters2344fae2001-01-15 00:50:52 +000077 '''Backwards compatible interface.
Guido van Rossuma3b4a331999-06-10 14:39:39 +000078
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000079 Call readmodule_ex() and then only keep Class objects from the
Tim Peters2344fae2001-01-15 00:50:52 +000080 resulting dictionary.'''
Guido van Rossuma3b4a331999-06-10 14:39:39 +000081
Tim Peters2344fae2001-01-15 00:50:52 +000082 res = {}
Christian Heimes81ee3ef2008-05-04 22:42:01 +000083 for key, value in _readmodule(module, path or []).items():
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000084 if isinstance(value, Class):
Tim Peters2344fae2001-01-15 00:50:52 +000085 res[key] = value
86 return res
Guido van Rossuma3b4a331999-06-10 14:39:39 +000087
Christian Heimes81ee3ef2008-05-04 22:42:01 +000088def readmodule_ex(module, path=None):
Tim Peters2344fae2001-01-15 00:50:52 +000089 '''Read a module file and return a dictionary of classes.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000090
Tim Peters2344fae2001-01-15 00:50:52 +000091 Search for MODULE in PATH and sys.path, read and parse the
92 module and return a dictionary with one entry for each class
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000093 found in the module.
Christian Heimes81ee3ef2008-05-04 22:42:01 +000094 '''
95 return _readmodule(module, path or [])
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000096
Christian Heimes81ee3ef2008-05-04 22:42:01 +000097def _readmodule(module, path, inpackage=None):
98 '''Do the hard work for readmodule[_ex].
99
100 If INPACKAGE is given, it must be the dotted name of the package in
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000101 which we are searching for a submodule, and then PATH must be the
102 package search path; otherwise, we are searching for a top-level
103 module, and PATH is combined with sys.path.
104 '''
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000105 # Compute the full module name (prepending inpackage if set)
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000106 if inpackage is not None:
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000107 fullmodule = "%s.%s" % (inpackage, module)
108 else:
109 fullmodule = module
110
111 # Check in the cache
112 if fullmodule in _modules:
113 return _modules[fullmodule]
114
115 # Initialize the dict for this module's contents
Tim Peters2344fae2001-01-15 00:50:52 +0000116 dict = {}
Guido van Rossum3d548711999-06-09 15:49:09 +0000117
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000118 # Check if it is a built-in module; we don't do much for these
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000119 if module in sys.builtin_module_names and inpackage is None:
Tim Peters2344fae2001-01-15 00:50:52 +0000120 _modules[module] = dict
121 return dict
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000122
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000123 # Check for a dotted module name
124 i = module.rfind('.')
125 if i >= 0:
126 package = module[:i]
127 submodule = module[i+1:]
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000128 parent = _readmodule(package, path, inpackage)
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000129 if inpackage is not None:
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000130 package = "%s.%s" % (inpackage, package)
Petri Lehtinen8d886042012-05-18 21:51:11 +0300131 if not '__path__' in parent:
132 raise ImportError('No package named {}'.format(package))
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000133 return _readmodule(submodule, parent['__path__'], package)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000134
135 # Search the path for the module
Tim Peters2344fae2001-01-15 00:50:52 +0000136 f = None
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000137 if inpackage is not None:
138 f, fname, (_s, _m, ty) = imp.find_module(module, path)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000139 else:
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000140 f, fname, (_s, _m, ty) = imp.find_module(module, path + sys.path)
141 if ty == imp.PKG_DIRECTORY:
142 dict['__path__'] = [fname]
143 path = [fname] + path
144 f, fname, (_s, _m, ty) = imp.find_module('__init__', [fname])
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000145 _modules[fullmodule] = dict
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000146 if ty != imp.PY_SOURCE:
Tim Peters2344fae2001-01-15 00:50:52 +0000147 # not Python source, can't do anything with this module
148 f.close()
Tim Peters2344fae2001-01-15 00:50:52 +0000149 return dict
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000150
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000151 stack = [] # stack of (class, indent) pairs
Guido van Rossumad380551999-06-07 15:25:18 +0000152
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000153 g = tokenize.generate_tokens(f.readline)
154 try:
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000155 for tokentype, token, start, _end, _line in g:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000156 if tokentype == DEDENT:
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000157 lineno, thisindent = start
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000158 # close nested classes and defs
159 while stack and stack[-1][1] >= thisindent:
160 del stack[-1]
161 elif token == 'def':
162 lineno, thisindent = start
163 # close previous nested classes and defs
164 while stack and stack[-1][1] >= thisindent:
165 del stack[-1]
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000166 tokentype, meth_name, start = next(g)[0:3]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000167 if tokentype != NAME:
168 continue # Syntax error
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000169 if stack:
170 cur_class = stack[-1][0]
171 if isinstance(cur_class, Class):
172 # it's a method
173 cur_class._addmethod(meth_name, lineno)
174 # else it's a nested def
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000175 else:
176 # it's a function
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000177 dict[meth_name] = Function(fullmodule, meth_name,
178 fname, lineno)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000179 stack.append((None, thisindent)) # Marker for nested fns
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000180 elif token == 'class':
181 lineno, thisindent = start
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000182 # close previous nested classes and defs
183 while stack and stack[-1][1] >= thisindent:
184 del stack[-1]
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000185 tokentype, class_name, start = next(g)[0:3]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000186 if tokentype != NAME:
187 continue # Syntax error
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000188 # parse what follows the class name
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000189 tokentype, token, start = next(g)[0:3]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000190 inherit = None
191 if token == '(':
192 names = [] # List of superclasses
193 # there's a list of superclasses
194 level = 1
195 super = [] # Tokens making up current superclass
196 while True:
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000197 tokentype, token, start = next(g)[0:3]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000198 if token in (')', ',') and level == 1:
199 n = "".join(super)
200 if n in dict:
201 # we know this super class
202 n = dict[n]
203 else:
204 c = n.split('.')
205 if len(c) > 1:
206 # super class is of the form
207 # module.class: look in module for
208 # class
209 m = c[-2]
210 c = c[-1]
211 if m in _modules:
212 d = _modules[m]
213 if c in d:
214 n = d[c]
215 names.append(n)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000216 super = []
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000217 if token == '(':
218 level += 1
219 elif token == ')':
220 level -= 1
221 if level == 0:
222 break
223 elif token == ',' and level == 1:
224 pass
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000225 # only use NAME and OP (== dot) tokens for type name
226 elif tokentype in (NAME, OP) and level == 1:
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000227 super.append(token)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000228 # expressions in the base list are not supported
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000229 inherit = names
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000230 cur_class = Class(fullmodule, class_name, inherit,
231 fname, lineno)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000232 if not stack:
233 dict[class_name] = cur_class
234 stack.append((cur_class, thisindent))
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000235 elif token == 'import' and start[1] == 0:
236 modules = _getnamelist(g)
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000237 for mod, _mod2 in modules:
Guido van Rossum258cba82002-09-16 16:36:02 +0000238 try:
239 # Recursively read the imported module
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000240 if inpackage is None:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000241 _readmodule(mod, path)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000242 else:
243 try:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000244 _readmodule(mod, path, inpackage)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000245 except ImportError:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000246 _readmodule(mod, [])
Guido van Rossum258cba82002-09-16 16:36:02 +0000247 except:
248 # If we can't find or parse the imported module,
249 # too bad -- don't die here.
250 pass
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000251 elif token == 'from' and start[1] == 0:
252 mod, token = _getname(g)
253 if not mod or token != "import":
254 continue
255 names = _getnamelist(g)
Tim Peters2344fae2001-01-15 00:50:52 +0000256 try:
Guido van Rossum258cba82002-09-16 16:36:02 +0000257 # Recursively read the imported module
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000258 d = _readmodule(mod, path, inpackage)
Tim Peters2344fae2001-01-15 00:50:52 +0000259 except:
Guido van Rossum258cba82002-09-16 16:36:02 +0000260 # If we can't find or parse the imported module,
261 # too bad -- don't die here.
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000262 continue
263 # add any classes that were defined in the imported module
264 # to our name space if they were mentioned in the list
265 for n, n2 in names:
266 if n in d:
267 dict[n2 or n] = d[n]
268 elif n == '*':
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000269 # don't add names that start with _
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000270 for n in d:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000271 if n[0] != '_':
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000272 dict[n] = d[n]
273 except StopIteration:
274 pass
Guido van Rossumad380551999-06-07 15:25:18 +0000275
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000276 f.close()
Tim Peters2344fae2001-01-15 00:50:52 +0000277 return dict
Guido van Rossumdf9f7a31999-06-08 12:53:21 +0000278
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000279def _getnamelist(g):
280 # Helper to get a comma-separated list of dotted names plus 'as'
281 # clauses. Return a list of pairs (name, name2) where name2 is
282 # the 'as' name, or None if there is no 'as' clause.
283 names = []
284 while True:
285 name, token = _getname(g)
286 if not name:
287 break
288 if token == 'as':
289 name2, token = _getname(g)
290 else:
291 name2 = None
292 names.append((name, name2))
293 while token != "," and "\n" not in token:
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000294 token = next(g)[1]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000295 if token != ",":
296 break
297 return names
298
299def _getname(g):
300 # Helper to get a dotted name, return a pair (name, token) where
301 # name is the dotted name, or None if there was no dotted name,
302 # and token is the next input token.
303 parts = []
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000304 tokentype, token = next(g)[0:2]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000305 if tokentype != NAME and token != '*':
306 return (None, token)
307 parts.append(token)
308 while True:
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000309 tokentype, token = next(g)[0:2]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000310 if token != '.':
311 break
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000312 tokentype, token = next(g)[0:2]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000313 if tokentype != NAME:
314 break
315 parts.append(token)
316 return (".".join(parts), token)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000317
318def _main():
319 # Main program for testing.
320 import os
321 mod = sys.argv[1]
322 if os.path.exists(mod):
323 path = [os.path.dirname(mod)]
324 mod = os.path.basename(mod)
325 if mod.lower().endswith(".py"):
326 mod = mod[:-3]
327 else:
328 path = []
329 dict = readmodule_ex(mod, path)
Raymond Hettinger8b5eb2f2011-01-27 00:06:54 +0000330 objs = list(dict.values())
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +0000331 objs.sort(key=lambda a: getattr(a, 'lineno', 0))
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000332 for obj in objs:
333 if isinstance(obj, Class):
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000334 print("class", obj.name, obj.super, obj.lineno)
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000335 methods = sorted(obj.methods.items(), key=itemgetter(1))
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000336 for name, lineno in methods:
337 if name != "__path__":
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000338 print(" def", name, lineno)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000339 elif isinstance(obj, Function):
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000340 print("def", obj.name, obj.lineno)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000341
342if __name__ == "__main__":
343 _main()