blob: 4d40b87f164ecc33baeb4db755a513318f4f7c06 [file] [log] [blame]
Guido van Rossum0a6f9542002-12-03 08:14:35 +00001"""Parse a Python module and describe its classes and methods.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00002
Guido van Rossum0a6f9542002-12-03 08:14:35 +00003Parse enough of a Python file to recognize imports and class and
4method definitions, and to find out the superclasses of a class.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00005
6The interface consists of a single function:
Guido van Rossum0a6f9542002-12-03 08:14:35 +00007 readmodule_ex(module [, path])
8where module is the name of a Python module, and path is an optional
9list of directories where the module is to be searched. If present,
10path is prepended to the system search path sys.path. The return
11value is a dictionary. The keys of the dictionary are the names of
12the classes defined in the module (including classes that are defined
13via the from XXX import YYY construct). The values are class
14instances of the class Class defined here. One special key/value pair
15is present for packages: the key '__path__' has a list as its value
16which contains the package search path.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000017
18A class is described by the class Class in this module. Instances
19of this class have the following instance variables:
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000020 module -- the module name
Tim Peters2344fae2001-01-15 00:50:52 +000021 name -- the name of the class
22 super -- a list of super classes (Class instances)
23 methods -- a dictionary of methods
24 file -- the file in which the class was defined
25 lineno -- the line in the file on which the class statement occurred
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000026The dictionary of methods uses the method names as keys and the line
27numbers on which the method was defined as values.
28If the name of a super class is not recognized, the corresponding
29entry in the list of super classes is not a class instance but a
30string giving the name of the super class. Since import statements
31are recognized and imported modules are scanned as well, this
32shouldn't happen often.
33
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000034A function is described by the class Function in this module.
35Instances of this class have the following instance variables:
36 module -- the module name
37 name -- the name of the class
38 file -- the file in which the class was defined
39 lineno -- the line in the file on which the class statement occurred
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +000040"""
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000041
Brett Cannonee78a2b2012-05-12 17:43:17 -040042import io
43import os
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000044import sys
Eric Snow6029e082014-01-25 15:32:46 -070045import importlib.util
Christian Heimes81ee3ef2008-05-04 22:42:01 +000046import tokenize
47from token import NAME, DEDENT, OP
Raymond Hettinger3375fc52003-12-01 20:12:15 +000048from operator import itemgetter
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000049
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000050__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
Skip Montanaroc62c81e2001-02-12 02:00:42 +000051
Guido van Rossumad380551999-06-07 15:25:18 +000052_modules = {} # cache of modules we've seen
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000053
54# each Python class is represented by an instance of this class
55class Class:
Tim Peters2344fae2001-01-15 00:50:52 +000056 '''Class to represent a Python class.'''
57 def __init__(self, module, name, super, file, lineno):
58 self.module = module
59 self.name = name
60 if super is None:
61 super = []
62 self.super = super
63 self.methods = {}
64 self.file = file
65 self.lineno = lineno
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000066
Tim Peters2344fae2001-01-15 00:50:52 +000067 def _addmethod(self, name, lineno):
68 self.methods[name] = lineno
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000069
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000070class Function:
Tim Peters2344fae2001-01-15 00:50:52 +000071 '''Class to represent a top-level Python function'''
72 def __init__(self, module, name, file, lineno):
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000073 self.module = module
74 self.name = name
75 self.file = file
76 self.lineno = lineno
Guido van Rossuma3b4a331999-06-10 14:39:39 +000077
Christian Heimes81ee3ef2008-05-04 22:42:01 +000078def readmodule(module, path=None):
Tim Peters2344fae2001-01-15 00:50:52 +000079 '''Backwards compatible interface.
Guido van Rossuma3b4a331999-06-10 14:39:39 +000080
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000081 Call readmodule_ex() and then only keep Class objects from the
Tim Peters2344fae2001-01-15 00:50:52 +000082 resulting dictionary.'''
Guido van Rossuma3b4a331999-06-10 14:39:39 +000083
Tim Peters2344fae2001-01-15 00:50:52 +000084 res = {}
Christian Heimes81ee3ef2008-05-04 22:42:01 +000085 for key, value in _readmodule(module, path or []).items():
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000086 if isinstance(value, Class):
Tim Peters2344fae2001-01-15 00:50:52 +000087 res[key] = value
88 return res
Guido van Rossuma3b4a331999-06-10 14:39:39 +000089
Christian Heimes81ee3ef2008-05-04 22:42:01 +000090def readmodule_ex(module, path=None):
Tim Peters2344fae2001-01-15 00:50:52 +000091 '''Read a module file and return a dictionary of classes.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000092
Tim Peters2344fae2001-01-15 00:50:52 +000093 Search for MODULE in PATH and sys.path, read and parse the
94 module and return a dictionary with one entry for each class
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000095 found in the module.
Christian Heimes81ee3ef2008-05-04 22:42:01 +000096 '''
97 return _readmodule(module, path or [])
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000098
Christian Heimes81ee3ef2008-05-04 22:42:01 +000099def _readmodule(module, path, inpackage=None):
100 '''Do the hard work for readmodule[_ex].
101
102 If INPACKAGE is given, it must be the dotted name of the package in
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000103 which we are searching for a submodule, and then PATH must be the
104 package search path; otherwise, we are searching for a top-level
105 module, and PATH is combined with sys.path.
106 '''
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000107 # Compute the full module name (prepending inpackage if set)
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000108 if inpackage is not None:
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000109 fullmodule = "%s.%s" % (inpackage, module)
110 else:
111 fullmodule = module
112
113 # Check in the cache
114 if fullmodule in _modules:
115 return _modules[fullmodule]
116
117 # Initialize the dict for this module's contents
Tim Peters2344fae2001-01-15 00:50:52 +0000118 dict = {}
Guido van Rossum3d548711999-06-09 15:49:09 +0000119
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000120 # Check if it is a built-in module; we don't do much for these
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000121 if module in sys.builtin_module_names and inpackage is None:
Tim Peters2344fae2001-01-15 00:50:52 +0000122 _modules[module] = dict
123 return dict
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000124
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000125 # Check for a dotted module name
126 i = module.rfind('.')
127 if i >= 0:
128 package = module[:i]
129 submodule = module[i+1:]
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000130 parent = _readmodule(package, path, inpackage)
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000131 if inpackage is not None:
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000132 package = "%s.%s" % (inpackage, package)
Petri Lehtinen8d886042012-05-18 21:51:11 +0300133 if not '__path__' in parent:
134 raise ImportError('No package named {}'.format(package))
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000135 return _readmodule(submodule, parent['__path__'], package)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000136
137 # Search the path for the module
Tim Peters2344fae2001-01-15 00:50:52 +0000138 f = None
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000139 if inpackage is not None:
Brett Cannonee78a2b2012-05-12 17:43:17 -0400140 search_path = path
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000141 else:
Brett Cannonee78a2b2012-05-12 17:43:17 -0400142 search_path = path + sys.path
Eric Snow02b9f9d2014-01-06 20:42:59 -0700143 # XXX This will change once issue19944 lands.
Eric Snow6029e082014-01-25 15:32:46 -0700144 spec = importlib.util._find_spec_from_path(fullmodule, search_path)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000145 _modules[fullmodule] = dict
Victor Stinner5c13aa12016-03-17 09:06:41 +0100146 # is module a package?
147 if spec.submodule_search_locations is not None:
148 dict['__path__'] = spec.submodule_search_locations
Brett Cannonee78a2b2012-05-12 17:43:17 -0400149 try:
Eric Snow02b9f9d2014-01-06 20:42:59 -0700150 source = spec.loader.get_source(fullmodule)
Brett Cannonee78a2b2012-05-12 17:43:17 -0400151 if source is None:
152 return dict
153 except (AttributeError, ImportError):
Tim Peters2344fae2001-01-15 00:50:52 +0000154 # not Python source, can't do anything with this module
Tim Peters2344fae2001-01-15 00:50:52 +0000155 return dict
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000156
Victor Stinner5c13aa12016-03-17 09:06:41 +0100157 fname = spec.loader.get_filename(fullmodule)
158
Brett Cannonee78a2b2012-05-12 17:43:17 -0400159 f = io.StringIO(source)
160
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000161 stack = [] # stack of (class, indent) pairs
Guido van Rossumad380551999-06-07 15:25:18 +0000162
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000163 g = tokenize.generate_tokens(f.readline)
164 try:
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000165 for tokentype, token, start, _end, _line in g:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000166 if tokentype == DEDENT:
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000167 lineno, thisindent = start
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000168 # close nested classes and defs
169 while stack and stack[-1][1] >= thisindent:
170 del stack[-1]
171 elif token == 'def':
172 lineno, thisindent = start
173 # close previous nested classes and defs
174 while stack and stack[-1][1] >= thisindent:
175 del stack[-1]
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000176 tokentype, meth_name, start = next(g)[0:3]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000177 if tokentype != NAME:
178 continue # Syntax error
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000179 if stack:
180 cur_class = stack[-1][0]
181 if isinstance(cur_class, Class):
182 # it's a method
183 cur_class._addmethod(meth_name, lineno)
184 # else it's a nested def
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000185 else:
186 # it's a function
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000187 dict[meth_name] = Function(fullmodule, meth_name,
188 fname, lineno)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000189 stack.append((None, thisindent)) # Marker for nested fns
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000190 elif token == 'class':
191 lineno, thisindent = start
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000192 # close previous nested classes and defs
193 while stack and stack[-1][1] >= thisindent:
194 del stack[-1]
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000195 tokentype, class_name, start = next(g)[0:3]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000196 if tokentype != NAME:
197 continue # Syntax error
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000198 # parse what follows the class name
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000199 tokentype, token, start = next(g)[0:3]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000200 inherit = None
201 if token == '(':
202 names = [] # List of superclasses
203 # there's a list of superclasses
204 level = 1
205 super = [] # Tokens making up current superclass
206 while True:
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000207 tokentype, token, start = next(g)[0:3]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000208 if token in (')', ',') and level == 1:
209 n = "".join(super)
210 if n in dict:
211 # we know this super class
212 n = dict[n]
213 else:
214 c = n.split('.')
215 if len(c) > 1:
216 # super class is of the form
217 # module.class: look in module for
218 # class
219 m = c[-2]
220 c = c[-1]
221 if m in _modules:
222 d = _modules[m]
223 if c in d:
224 n = d[c]
225 names.append(n)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000226 super = []
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000227 if token == '(':
228 level += 1
229 elif token == ')':
230 level -= 1
231 if level == 0:
232 break
233 elif token == ',' and level == 1:
234 pass
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000235 # only use NAME and OP (== dot) tokens for type name
236 elif tokentype in (NAME, OP) and level == 1:
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000237 super.append(token)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000238 # expressions in the base list are not supported
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000239 inherit = names
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000240 cur_class = Class(fullmodule, class_name, inherit,
241 fname, lineno)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000242 if not stack:
243 dict[class_name] = cur_class
244 stack.append((cur_class, thisindent))
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000245 elif token == 'import' and start[1] == 0:
246 modules = _getnamelist(g)
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000247 for mod, _mod2 in modules:
Guido van Rossum258cba82002-09-16 16:36:02 +0000248 try:
249 # Recursively read the imported module
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000250 if inpackage is None:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000251 _readmodule(mod, path)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000252 else:
253 try:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000254 _readmodule(mod, path, inpackage)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000255 except ImportError:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000256 _readmodule(mod, [])
Guido van Rossum258cba82002-09-16 16:36:02 +0000257 except:
258 # If we can't find or parse the imported module,
259 # too bad -- don't die here.
260 pass
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000261 elif token == 'from' and start[1] == 0:
262 mod, token = _getname(g)
263 if not mod or token != "import":
264 continue
265 names = _getnamelist(g)
Tim Peters2344fae2001-01-15 00:50:52 +0000266 try:
Guido van Rossum258cba82002-09-16 16:36:02 +0000267 # Recursively read the imported module
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000268 d = _readmodule(mod, path, inpackage)
Tim Peters2344fae2001-01-15 00:50:52 +0000269 except:
Guido van Rossum258cba82002-09-16 16:36:02 +0000270 # If we can't find or parse the imported module,
271 # too bad -- don't die here.
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000272 continue
273 # add any classes that were defined in the imported module
274 # to our name space if they were mentioned in the list
275 for n, n2 in names:
276 if n in d:
277 dict[n2 or n] = d[n]
278 elif n == '*':
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000279 # don't add names that start with _
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000280 for n in d:
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000281 if n[0] != '_':
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000282 dict[n] = d[n]
283 except StopIteration:
284 pass
Guido van Rossumad380551999-06-07 15:25:18 +0000285
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000286 f.close()
Tim Peters2344fae2001-01-15 00:50:52 +0000287 return dict
Guido van Rossumdf9f7a31999-06-08 12:53:21 +0000288
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000289def _getnamelist(g):
290 # Helper to get a comma-separated list of dotted names plus 'as'
291 # clauses. Return a list of pairs (name, name2) where name2 is
292 # the 'as' name, or None if there is no 'as' clause.
293 names = []
294 while True:
295 name, token = _getname(g)
296 if not name:
297 break
298 if token == 'as':
299 name2, token = _getname(g)
300 else:
301 name2 = None
302 names.append((name, name2))
303 while token != "," and "\n" not in token:
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000304 token = next(g)[1]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000305 if token != ",":
306 break
307 return names
308
309def _getname(g):
310 # Helper to get a dotted name, return a pair (name, token) where
311 # name is the dotted name, or None if there was no dotted name,
312 # and token is the next input token.
313 parts = []
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000314 tokentype, token = next(g)[0:2]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000315 if tokentype != NAME and token != '*':
316 return (None, token)
317 parts.append(token)
318 while True:
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000319 tokentype, token = next(g)[0:2]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000320 if token != '.':
321 break
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000322 tokentype, token = next(g)[0:2]
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000323 if tokentype != NAME:
324 break
325 parts.append(token)
326 return (".".join(parts), token)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000327
328def _main():
329 # Main program for testing.
330 import os
331 mod = sys.argv[1]
332 if os.path.exists(mod):
333 path = [os.path.dirname(mod)]
334 mod = os.path.basename(mod)
335 if mod.lower().endswith(".py"):
336 mod = mod[:-3]
337 else:
338 path = []
339 dict = readmodule_ex(mod, path)
Raymond Hettinger8b5eb2f2011-01-27 00:06:54 +0000340 objs = list(dict.values())
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +0000341 objs.sort(key=lambda a: getattr(a, 'lineno', 0))
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000342 for obj in objs:
343 if isinstance(obj, Class):
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000344 print("class", obj.name, obj.super, obj.lineno)
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000345 methods = sorted(obj.methods.items(), key=itemgetter(1))
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000346 for name, lineno in methods:
347 if name != "__path__":
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000348 print(" def", name, lineno)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000349 elif isinstance(obj, Function):
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000350 print("def", obj.name, obj.lineno)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000351
352if __name__ == "__main__":
353 _main()