blob: f0c8381946c614a065a2fb224c49c606b80b0d5f [file] [log] [blame]
csabella246ff3b2017-07-03 21:31:25 -04001"""Parse a Python module and describe its classes and functions.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00002
Guido van Rossum0a6f9542002-12-03 08:14:35 +00003Parse enough of a Python file to recognize imports and class and
csabella246ff3b2017-07-03 21:31:25 -04004function definitions, and to find out the superclasses of a class.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00005
6The interface consists of a single function:
csabella246ff3b2017-07-03 21:31:25 -04007 readmodule_ex(module, path=None)
Guido van Rossum0a6f9542002-12-03 08:14:35 +00008where module is the name of a Python module, and path is an optional
9list of directories where the module is to be searched. If present,
csabella246ff3b2017-07-03 21:31:25 -040010path is prepended to the system search path sys.path. The return value
11is a dictionary. The keys of the dictionary are the names of the
12classes and functions defined in the module (including classes that are
13defined via the from XXX import YYY construct). The values are
14instances of classes Class and Function. One special key/value pair is
15present for packages: the key '__path__' has a list as its value which
16contains the package search path.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000017
csabella246ff3b2017-07-03 21:31:25 -040018Classes and Functions have a common superclass: _Object. Every instance
19has the following attributes:
20 module -- name of the module;
21 name -- name of the object;
22 file -- file in which the object is defined;
23 lineno -- line in the file where the object's definition starts;
24 parent -- parent of this object, if any;
25 children -- nested objects contained in this object.
26The 'children' attribute is a dictionary mapping names to objects.
27
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030028Instances of Function describe functions with the attributes from _Object,
29plus the following:
30 is_async -- if a function is defined with an 'async' prefix
csabella246ff3b2017-07-03 21:31:25 -040031
32Instances of Class describe classes with the attributes from _Object,
33plus the following:
34 super -- list of super classes (Class instances if possible);
35 methods -- mapping of method names to beginning line numbers.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000036If the name of a super class is not recognized, the corresponding
37entry in the list of super classes is not a class instance but a
38string giving the name of the super class. Since import statements
39are recognized and imported modules are scanned as well, this
40shouldn't happen often.
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +000041"""
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000042
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030043import ast
44import copy
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000045import sys
Eric Snow6029e082014-01-25 15:32:46 -070046import importlib.util
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000047
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000048__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
Skip Montanaroc62c81e2001-02-12 02:00:42 +000049
csabella246ff3b2017-07-03 21:31:25 -040050_modules = {} # Initialize cache of modules we've seen.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000051
csabella246ff3b2017-07-03 21:31:25 -040052
53class _Object:
Xtreak0d702272019-06-03 04:42:33 +053054 "Information about Python class or function."
csabella246ff3b2017-07-03 21:31:25 -040055 def __init__(self, module, name, file, lineno, parent):
Tim Peters2344fae2001-01-15 00:50:52 +000056 self.module = module
57 self.name = name
Tim Peters2344fae2001-01-15 00:50:52 +000058 self.file = file
59 self.lineno = lineno
csabella246ff3b2017-07-03 21:31:25 -040060 self.parent = parent
61 self.children = {}
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030062 if parent is not None:
63 parent.children[name] = self
csabella246ff3b2017-07-03 21:31:25 -040064
65class Function(_Object):
66 "Information about a Python function, including methods."
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030067 def __init__(self, module, name, file, lineno, parent=None, is_async=False):
68 super().__init__(module, name, file, lineno, parent)
69 self.is_async = is_async
70 if isinstance(parent, Class):
71 parent.methods[name] = lineno
csabella246ff3b2017-07-03 21:31:25 -040072
73class Class(_Object):
74 "Information about a Python class."
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030075 def __init__(self, module, name, super_, file, lineno, parent=None):
76 super().__init__(module, name, file, lineno, parent)
77 self.super = super_ or []
csabella246ff3b2017-07-03 21:31:25 -040078 self.methods = {}
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000079
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030080# These 2 functions are used in these tests
81# Lib/test/test_pyclbr, Lib/idlelib/idle_test/test_browser.py
82def _nest_function(ob, func_name, lineno, is_async=False):
csabella246ff3b2017-07-03 21:31:25 -040083 "Return a Function after nesting within ob."
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030084 return Function(ob.module, func_name, ob.file, lineno, ob, is_async)
csabella246ff3b2017-07-03 21:31:25 -040085
86def _nest_class(ob, class_name, lineno, super=None):
87 "Return a Class after nesting within ob."
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030088 return Class(ob.module, class_name, super, ob.file, lineno, ob)
Guido van Rossuma3b4a331999-06-10 14:39:39 +000089
Christian Heimes81ee3ef2008-05-04 22:42:01 +000090def readmodule(module, path=None):
csabella246ff3b2017-07-03 21:31:25 -040091 """Return Class objects for the top-level classes in module.
Guido van Rossuma3b4a331999-06-10 14:39:39 +000092
csabella246ff3b2017-07-03 21:31:25 -040093 This is the original interface, before Functions were added.
94 """
Guido van Rossuma3b4a331999-06-10 14:39:39 +000095
Tim Peters2344fae2001-01-15 00:50:52 +000096 res = {}
Christian Heimes81ee3ef2008-05-04 22:42:01 +000097 for key, value in _readmodule(module, path or []).items():
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000098 if isinstance(value, Class):
Tim Peters2344fae2001-01-15 00:50:52 +000099 res[key] = value
100 return res
Guido van Rossuma3b4a331999-06-10 14:39:39 +0000101
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000102def readmodule_ex(module, path=None):
csabella246ff3b2017-07-03 21:31:25 -0400103 """Return a dictionary with all functions and classes in module.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000104
csabella246ff3b2017-07-03 21:31:25 -0400105 Search for module in PATH + sys.path.
106 If possible, include imported superclasses.
107 Do this by reading source, without importing (and executing) it.
108 """
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000109 return _readmodule(module, path or [])
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000110
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000111def _readmodule(module, path, inpackage=None):
csabella246ff3b2017-07-03 21:31:25 -0400112 """Do the hard work for readmodule[_ex].
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000113
csabella246ff3b2017-07-03 21:31:25 -0400114 If inpackage is given, it must be the dotted name of the package in
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000115 which we are searching for a submodule, and then PATH must be the
116 package search path; otherwise, we are searching for a top-level
csabella246ff3b2017-07-03 21:31:25 -0400117 module, and path is combined with sys.path.
118 """
119 # Compute the full module name (prepending inpackage if set).
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000120 if inpackage is not None:
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000121 fullmodule = "%s.%s" % (inpackage, module)
122 else:
123 fullmodule = module
124
csabella246ff3b2017-07-03 21:31:25 -0400125 # Check in the cache.
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000126 if fullmodule in _modules:
127 return _modules[fullmodule]
128
csabella246ff3b2017-07-03 21:31:25 -0400129 # Initialize the dict for this module's contents.
130 tree = {}
Guido van Rossum3d548711999-06-09 15:49:09 +0000131
csabella246ff3b2017-07-03 21:31:25 -0400132 # Check if it is a built-in module; we don't do much for these.
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000133 if module in sys.builtin_module_names and inpackage is None:
csabella246ff3b2017-07-03 21:31:25 -0400134 _modules[module] = tree
135 return tree
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000136
csabella246ff3b2017-07-03 21:31:25 -0400137 # Check for a dotted module name.
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000138 i = module.rfind('.')
139 if i >= 0:
140 package = module[:i]
141 submodule = module[i+1:]
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000142 parent = _readmodule(package, path, inpackage)
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000143 if inpackage is not None:
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000144 package = "%s.%s" % (inpackage, package)
Petri Lehtinen8d886042012-05-18 21:51:11 +0300145 if not '__path__' in parent:
146 raise ImportError('No package named {}'.format(package))
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000147 return _readmodule(submodule, parent['__path__'], package)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000148
csabella246ff3b2017-07-03 21:31:25 -0400149 # Search the path for the module.
Tim Peters2344fae2001-01-15 00:50:52 +0000150 f = None
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000151 if inpackage is not None:
Brett Cannonee78a2b2012-05-12 17:43:17 -0400152 search_path = path
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000153 else:
Brett Cannonee78a2b2012-05-12 17:43:17 -0400154 search_path = path + sys.path
Eric Snow6029e082014-01-25 15:32:46 -0700155 spec = importlib.util._find_spec_from_path(fullmodule, search_path)
Brett Cannon50865892019-03-22 15:16:50 -0700156 if spec is None:
157 raise ModuleNotFoundError(f"no module named {fullmodule!r}", name=fullmodule)
csabella246ff3b2017-07-03 21:31:25 -0400158 _modules[fullmodule] = tree
159 # Is module a package?
Victor Stinner5c13aa12016-03-17 09:06:41 +0100160 if spec.submodule_search_locations is not None:
csabella246ff3b2017-07-03 21:31:25 -0400161 tree['__path__'] = spec.submodule_search_locations
Brett Cannonee78a2b2012-05-12 17:43:17 -0400162 try:
Eric Snow02b9f9d2014-01-06 20:42:59 -0700163 source = spec.loader.get_source(fullmodule)
Brett Cannonee78a2b2012-05-12 17:43:17 -0400164 except (AttributeError, ImportError):
csabella246ff3b2017-07-03 21:31:25 -0400165 # If module is not Python source, we cannot do anything.
166 return tree
Brett Cannon50865892019-03-22 15:16:50 -0700167 else:
168 if source is None:
169 return tree
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000170
Victor Stinner5c13aa12016-03-17 09:06:41 +0100171 fname = spec.loader.get_filename(fullmodule)
csabella246ff3b2017-07-03 21:31:25 -0400172 return _create_tree(fullmodule, path, fname, source, tree, inpackage)
Victor Stinner5c13aa12016-03-17 09:06:41 +0100173
csabella246ff3b2017-07-03 21:31:25 -0400174
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300175class _ModuleBrowser(ast.NodeVisitor):
176 def __init__(self, module, path, file, tree, inpackage):
177 self.path = path
178 self.tree = tree
179 self.file = file
180 self.module = module
181 self.inpackage = inpackage
182 self.stack = []
csabella246ff3b2017-07-03 21:31:25 -0400183
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300184 def visit_ClassDef(self, node):
185 bases = []
186 for base in node.bases:
187 name = ast.unparse(base)
188 if name in self.tree:
189 # We know this super class.
190 bases.append(self.tree[name])
191 elif len(names := name.split(".")) > 1:
192 # Super class form is module.class:
193 # look in module for class.
194 *_, module, class_ = names
195 if module in _modules:
196 bases.append(_modules[module].get(class_, name))
197 else:
198 bases.append(name)
csabella246ff3b2017-07-03 21:31:25 -0400199
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300200 parent = self.stack[-1] if self.stack else None
201 class_ = Class(
202 self.module, node.name, bases, self.file, node.lineno, parent
203 )
204 if parent is None:
205 self.tree[node.name] = class_
206 self.stack.append(class_)
207 self.generic_visit(node)
208 self.stack.pop()
Brett Cannonee78a2b2012-05-12 17:43:17 -0400209
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300210 def visit_FunctionDef(self, node, *, is_async=False):
211 parent = self.stack[-1] if self.stack else None
212 function = Function(
213 self.module, node.name, self.file, node.lineno, parent, is_async
214 )
215 if parent is None:
216 self.tree[node.name] = function
217 self.stack.append(function)
218 self.generic_visit(node)
219 self.stack.pop()
Guido van Rossumad380551999-06-07 15:25:18 +0000220
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300221 def visit_AsyncFunctionDef(self, node):
222 self.visit_FunctionDef(node, is_async=True)
223
224 def visit_Import(self, node):
225 if node.col_offset != 0:
226 return
227
228 for module in node.names:
229 try:
Tim Peters2344fae2001-01-15 00:50:52 +0000230 try:
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300231 _readmodule(module.name, self.path, self.inpackage)
232 except ImportError:
233 _readmodule(module.name, [])
234 except (ImportError, SyntaxError):
235 # If we can't find or parse the imported module,
236 # too bad -- don't die here.
237 continue
Guido van Rossumad380551999-06-07 15:25:18 +0000238
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300239 def visit_ImportFrom(self, node):
240 if node.col_offset != 0:
241 return
242 try:
243 module = "." * node.level
244 if node.module:
245 module += node.module
246 module = _readmodule(module, self.path, self.inpackage)
247 except (ImportError, SyntaxError):
248 return
249
250 for name in node.names:
251 if name.name in module:
252 self.tree[name.asname or name.name] = module[name.name]
253 elif name.name == "*":
254 for import_name, import_value in module.items():
255 if import_name.startswith("_"):
256 continue
257 self.tree[import_name] = import_value
csabella246ff3b2017-07-03 21:31:25 -0400258
Guido van Rossumdf9f7a31999-06-08 12:53:21 +0000259
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300260def _create_tree(fullmodule, path, fname, source, tree, inpackage):
261 mbrowser = _ModuleBrowser(fullmodule, path, fname, tree, inpackage)
262 mbrowser.visit(ast.parse(source))
263 return mbrowser.tree
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000264
csabella246ff3b2017-07-03 21:31:25 -0400265
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000266def _main():
csabella246ff3b2017-07-03 21:31:25 -0400267 "Print module output (default this file) for quick visual check."
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000268 import os
csabella246ff3b2017-07-03 21:31:25 -0400269 try:
270 mod = sys.argv[1]
271 except:
272 mod = __file__
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000273 if os.path.exists(mod):
274 path = [os.path.dirname(mod)]
275 mod = os.path.basename(mod)
276 if mod.lower().endswith(".py"):
277 mod = mod[:-3]
278 else:
279 path = []
csabella246ff3b2017-07-03 21:31:25 -0400280 tree = readmodule_ex(mod, path)
281 lineno_key = lambda a: getattr(a, 'lineno', 0)
282 objs = sorted(tree.values(), key=lineno_key, reverse=True)
283 indent_level = 2
284 while objs:
285 obj = objs.pop()
286 if isinstance(obj, list):
287 # Value is a __path__ key.
288 continue
289 if not hasattr(obj, 'indent'):
290 obj.indent = 0
291
292 if isinstance(obj, _Object):
293 new_objs = sorted(obj.children.values(),
294 key=lineno_key, reverse=True)
295 for ob in new_objs:
296 ob.indent = obj.indent + indent_level
297 objs.extend(new_objs)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000298 if isinstance(obj, Class):
csabella246ff3b2017-07-03 21:31:25 -0400299 print("{}class {} {} {}"
300 .format(' ' * obj.indent, obj.name, obj.super, obj.lineno))
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000301 elif isinstance(obj, Function):
csabella246ff3b2017-07-03 21:31:25 -0400302 print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno))
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000303
304if __name__ == "__main__":
305 _main()