blob: 37f86995d6ce00013643f1b4a8aa387cebaeae72 [file] [log] [blame]
csabella246ff3b2017-07-03 21:31:25 -04001"""Parse a Python module and describe its classes and functions.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00002
Guido van Rossum0a6f9542002-12-03 08:14:35 +00003Parse enough of a Python file to recognize imports and class and
csabella246ff3b2017-07-03 21:31:25 -04004function definitions, and to find out the superclasses of a class.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00005
6The interface consists of a single function:
csabella246ff3b2017-07-03 21:31:25 -04007 readmodule_ex(module, path=None)
Guido van Rossum0a6f9542002-12-03 08:14:35 +00008where module is the name of a Python module, and path is an optional
9list of directories where the module is to be searched. If present,
csabella246ff3b2017-07-03 21:31:25 -040010path is prepended to the system search path sys.path. The return value
11is a dictionary. The keys of the dictionary are the names of the
12classes and functions defined in the module (including classes that are
13defined via the from XXX import YYY construct). The values are
14instances of classes Class and Function. One special key/value pair is
15present for packages: the key '__path__' has a list as its value which
16contains the package search path.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000017
csabella246ff3b2017-07-03 21:31:25 -040018Classes and Functions have a common superclass: _Object. Every instance
19has the following attributes:
20 module -- name of the module;
21 name -- name of the object;
22 file -- file in which the object is defined;
23 lineno -- line in the file where the object's definition starts;
Aviral Srivastava000cde52021-02-01 09:38:44 -080024 end_lineno -- line in the file where the object's definition ends;
csabella246ff3b2017-07-03 21:31:25 -040025 parent -- parent of this object, if any;
26 children -- nested objects contained in this object.
27The 'children' attribute is a dictionary mapping names to objects.
28
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030029Instances of Function describe functions with the attributes from _Object,
30plus the following:
31 is_async -- if a function is defined with an 'async' prefix
csabella246ff3b2017-07-03 21:31:25 -040032
33Instances of Class describe classes with the attributes from _Object,
34plus the following:
35 super -- list of super classes (Class instances if possible);
36 methods -- mapping of method names to beginning line numbers.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000037If the name of a super class is not recognized, the corresponding
38entry in the list of super classes is not a class instance but a
39string giving the name of the super class. Since import statements
40are recognized and imported modules are scanned as well, this
41shouldn't happen often.
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +000042"""
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000043
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030044import ast
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000045import sys
Eric Snow6029e082014-01-25 15:32:46 -070046import importlib.util
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000047
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000048__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
Skip Montanaroc62c81e2001-02-12 02:00:42 +000049
csabella246ff3b2017-07-03 21:31:25 -040050_modules = {} # Initialize cache of modules we've seen.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000051
csabella246ff3b2017-07-03 21:31:25 -040052
53class _Object:
Xtreak0d702272019-06-03 04:42:33 +053054 "Information about Python class or function."
Aviral Srivastava000cde52021-02-01 09:38:44 -080055 def __init__(self, module, name, file, lineno, end_lineno, parent):
Tim Peters2344fae2001-01-15 00:50:52 +000056 self.module = module
57 self.name = name
Tim Peters2344fae2001-01-15 00:50:52 +000058 self.file = file
59 self.lineno = lineno
Aviral Srivastava000cde52021-02-01 09:38:44 -080060 self.end_lineno = end_lineno
csabella246ff3b2017-07-03 21:31:25 -040061 self.parent = parent
62 self.children = {}
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030063 if parent is not None:
64 parent.children[name] = self
csabella246ff3b2017-07-03 21:31:25 -040065
Aviral Srivastava000cde52021-02-01 09:38:44 -080066
67# Odd Function and Class signatures are for back-compatibility.
csabella246ff3b2017-07-03 21:31:25 -040068class Function(_Object):
69 "Information about a Python function, including methods."
Aviral Srivastava000cde52021-02-01 09:38:44 -080070 def __init__(self, module, name, file, lineno,
71 parent=None, is_async=False, *, end_lineno=None):
72 super().__init__(module, name, file, lineno, end_lineno, parent)
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030073 self.is_async = is_async
74 if isinstance(parent, Class):
75 parent.methods[name] = lineno
csabella246ff3b2017-07-03 21:31:25 -040076
Aviral Srivastava000cde52021-02-01 09:38:44 -080077
csabella246ff3b2017-07-03 21:31:25 -040078class Class(_Object):
79 "Information about a Python class."
Aviral Srivastava000cde52021-02-01 09:38:44 -080080 def __init__(self, module, name, super_, file, lineno,
81 parent=None, *, end_lineno=None):
82 super().__init__(module, name, file, lineno, end_lineno, parent)
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030083 self.super = super_ or []
csabella246ff3b2017-07-03 21:31:25 -040084 self.methods = {}
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000085
Aviral Srivastava000cde52021-02-01 09:38:44 -080086
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030087# These 2 functions are used in these tests
88# Lib/test/test_pyclbr, Lib/idlelib/idle_test/test_browser.py
Aviral Srivastava000cde52021-02-01 09:38:44 -080089def _nest_function(ob, func_name, lineno, end_lineno, is_async=False):
csabella246ff3b2017-07-03 21:31:25 -040090 "Return a Function after nesting within ob."
Aviral Srivastava000cde52021-02-01 09:38:44 -080091 return Function(ob.module, func_name, ob.file, lineno,
92 parent=ob, is_async=is_async, end_lineno=end_lineno)
csabella246ff3b2017-07-03 21:31:25 -040093
Aviral Srivastava000cde52021-02-01 09:38:44 -080094def _nest_class(ob, class_name, lineno, end_lineno, super=None):
csabella246ff3b2017-07-03 21:31:25 -040095 "Return a Class after nesting within ob."
Aviral Srivastava000cde52021-02-01 09:38:44 -080096 return Class(ob.module, class_name, super, ob.file, lineno,
97 parent=ob, end_lineno=end_lineno)
98
Guido van Rossuma3b4a331999-06-10 14:39:39 +000099
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000100def readmodule(module, path=None):
csabella246ff3b2017-07-03 21:31:25 -0400101 """Return Class objects for the top-level classes in module.
Guido van Rossuma3b4a331999-06-10 14:39:39 +0000102
csabella246ff3b2017-07-03 21:31:25 -0400103 This is the original interface, before Functions were added.
104 """
Guido van Rossuma3b4a331999-06-10 14:39:39 +0000105
Tim Peters2344fae2001-01-15 00:50:52 +0000106 res = {}
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000107 for key, value in _readmodule(module, path or []).items():
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000108 if isinstance(value, Class):
Tim Peters2344fae2001-01-15 00:50:52 +0000109 res[key] = value
110 return res
Guido van Rossuma3b4a331999-06-10 14:39:39 +0000111
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000112def readmodule_ex(module, path=None):
csabella246ff3b2017-07-03 21:31:25 -0400113 """Return a dictionary with all functions and classes in module.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000114
csabella246ff3b2017-07-03 21:31:25 -0400115 Search for module in PATH + sys.path.
116 If possible, include imported superclasses.
117 Do this by reading source, without importing (and executing) it.
118 """
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000119 return _readmodule(module, path or [])
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000120
Aviral Srivastava000cde52021-02-01 09:38:44 -0800121
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000122def _readmodule(module, path, inpackage=None):
csabella246ff3b2017-07-03 21:31:25 -0400123 """Do the hard work for readmodule[_ex].
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000124
csabella246ff3b2017-07-03 21:31:25 -0400125 If inpackage is given, it must be the dotted name of the package in
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000126 which we are searching for a submodule, and then PATH must be the
127 package search path; otherwise, we are searching for a top-level
csabella246ff3b2017-07-03 21:31:25 -0400128 module, and path is combined with sys.path.
129 """
130 # Compute the full module name (prepending inpackage if set).
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000131 if inpackage is not None:
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000132 fullmodule = "%s.%s" % (inpackage, module)
133 else:
134 fullmodule = module
135
csabella246ff3b2017-07-03 21:31:25 -0400136 # Check in the cache.
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000137 if fullmodule in _modules:
138 return _modules[fullmodule]
139
csabella246ff3b2017-07-03 21:31:25 -0400140 # Initialize the dict for this module's contents.
141 tree = {}
Guido van Rossum3d548711999-06-09 15:49:09 +0000142
csabella246ff3b2017-07-03 21:31:25 -0400143 # Check if it is a built-in module; we don't do much for these.
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000144 if module in sys.builtin_module_names and inpackage is None:
csabella246ff3b2017-07-03 21:31:25 -0400145 _modules[module] = tree
146 return tree
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000147
csabella246ff3b2017-07-03 21:31:25 -0400148 # Check for a dotted module name.
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000149 i = module.rfind('.')
150 if i >= 0:
151 package = module[:i]
152 submodule = module[i+1:]
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000153 parent = _readmodule(package, path, inpackage)
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000154 if inpackage is not None:
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000155 package = "%s.%s" % (inpackage, package)
Petri Lehtinen8d886042012-05-18 21:51:11 +0300156 if not '__path__' in parent:
157 raise ImportError('No package named {}'.format(package))
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000158 return _readmodule(submodule, parent['__path__'], package)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000159
csabella246ff3b2017-07-03 21:31:25 -0400160 # Search the path for the module.
Tim Peters2344fae2001-01-15 00:50:52 +0000161 f = None
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000162 if inpackage is not None:
Brett Cannonee78a2b2012-05-12 17:43:17 -0400163 search_path = path
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000164 else:
Brett Cannonee78a2b2012-05-12 17:43:17 -0400165 search_path = path + sys.path
Eric Snow6029e082014-01-25 15:32:46 -0700166 spec = importlib.util._find_spec_from_path(fullmodule, search_path)
Brett Cannon50865892019-03-22 15:16:50 -0700167 if spec is None:
168 raise ModuleNotFoundError(f"no module named {fullmodule!r}", name=fullmodule)
csabella246ff3b2017-07-03 21:31:25 -0400169 _modules[fullmodule] = tree
170 # Is module a package?
Victor Stinner5c13aa12016-03-17 09:06:41 +0100171 if spec.submodule_search_locations is not None:
csabella246ff3b2017-07-03 21:31:25 -0400172 tree['__path__'] = spec.submodule_search_locations
Brett Cannonee78a2b2012-05-12 17:43:17 -0400173 try:
Eric Snow02b9f9d2014-01-06 20:42:59 -0700174 source = spec.loader.get_source(fullmodule)
Brett Cannonee78a2b2012-05-12 17:43:17 -0400175 except (AttributeError, ImportError):
csabella246ff3b2017-07-03 21:31:25 -0400176 # If module is not Python source, we cannot do anything.
177 return tree
Brett Cannon50865892019-03-22 15:16:50 -0700178 else:
179 if source is None:
180 return tree
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000181
Victor Stinner5c13aa12016-03-17 09:06:41 +0100182 fname = spec.loader.get_filename(fullmodule)
csabella246ff3b2017-07-03 21:31:25 -0400183 return _create_tree(fullmodule, path, fname, source, tree, inpackage)
Victor Stinner5c13aa12016-03-17 09:06:41 +0100184
csabella246ff3b2017-07-03 21:31:25 -0400185
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300186class _ModuleBrowser(ast.NodeVisitor):
187 def __init__(self, module, path, file, tree, inpackage):
188 self.path = path
189 self.tree = tree
190 self.file = file
191 self.module = module
192 self.inpackage = inpackage
193 self.stack = []
csabella246ff3b2017-07-03 21:31:25 -0400194
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300195 def visit_ClassDef(self, node):
196 bases = []
197 for base in node.bases:
198 name = ast.unparse(base)
199 if name in self.tree:
200 # We know this super class.
201 bases.append(self.tree[name])
202 elif len(names := name.split(".")) > 1:
203 # Super class form is module.class:
204 # look in module for class.
205 *_, module, class_ = names
206 if module in _modules:
207 bases.append(_modules[module].get(class_, name))
208 else:
209 bases.append(name)
csabella246ff3b2017-07-03 21:31:25 -0400210
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300211 parent = self.stack[-1] if self.stack else None
Aviral Srivastava000cde52021-02-01 09:38:44 -0800212 class_ = Class(self.module, node.name, bases, self.file, node.lineno,
213 parent=parent, end_lineno=node.end_lineno)
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300214 if parent is None:
215 self.tree[node.name] = class_
216 self.stack.append(class_)
217 self.generic_visit(node)
218 self.stack.pop()
Brett Cannonee78a2b2012-05-12 17:43:17 -0400219
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300220 def visit_FunctionDef(self, node, *, is_async=False):
221 parent = self.stack[-1] if self.stack else None
Aviral Srivastava000cde52021-02-01 09:38:44 -0800222 function = Function(self.module, node.name, self.file, node.lineno,
223 parent, is_async, end_lineno=node.end_lineno)
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300224 if parent is None:
225 self.tree[node.name] = function
226 self.stack.append(function)
227 self.generic_visit(node)
228 self.stack.pop()
Guido van Rossumad380551999-06-07 15:25:18 +0000229
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300230 def visit_AsyncFunctionDef(self, node):
231 self.visit_FunctionDef(node, is_async=True)
232
233 def visit_Import(self, node):
234 if node.col_offset != 0:
235 return
236
237 for module in node.names:
238 try:
Tim Peters2344fae2001-01-15 00:50:52 +0000239 try:
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300240 _readmodule(module.name, self.path, self.inpackage)
241 except ImportError:
242 _readmodule(module.name, [])
243 except (ImportError, SyntaxError):
244 # If we can't find or parse the imported module,
245 # too bad -- don't die here.
246 continue
Guido van Rossumad380551999-06-07 15:25:18 +0000247
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300248 def visit_ImportFrom(self, node):
249 if node.col_offset != 0:
250 return
251 try:
252 module = "." * node.level
253 if node.module:
254 module += node.module
255 module = _readmodule(module, self.path, self.inpackage)
256 except (ImportError, SyntaxError):
257 return
258
259 for name in node.names:
260 if name.name in module:
261 self.tree[name.asname or name.name] = module[name.name]
262 elif name.name == "*":
263 for import_name, import_value in module.items():
264 if import_name.startswith("_"):
265 continue
266 self.tree[import_name] = import_value
csabella246ff3b2017-07-03 21:31:25 -0400267
Guido van Rossumdf9f7a31999-06-08 12:53:21 +0000268
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300269def _create_tree(fullmodule, path, fname, source, tree, inpackage):
270 mbrowser = _ModuleBrowser(fullmodule, path, fname, tree, inpackage)
271 mbrowser.visit(ast.parse(source))
272 return mbrowser.tree
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000273
csabella246ff3b2017-07-03 21:31:25 -0400274
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000275def _main():
csabella246ff3b2017-07-03 21:31:25 -0400276 "Print module output (default this file) for quick visual check."
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000277 import os
csabella246ff3b2017-07-03 21:31:25 -0400278 try:
279 mod = sys.argv[1]
280 except:
281 mod = __file__
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000282 if os.path.exists(mod):
283 path = [os.path.dirname(mod)]
284 mod = os.path.basename(mod)
285 if mod.lower().endswith(".py"):
286 mod = mod[:-3]
287 else:
288 path = []
csabella246ff3b2017-07-03 21:31:25 -0400289 tree = readmodule_ex(mod, path)
290 lineno_key = lambda a: getattr(a, 'lineno', 0)
291 objs = sorted(tree.values(), key=lineno_key, reverse=True)
292 indent_level = 2
293 while objs:
294 obj = objs.pop()
295 if isinstance(obj, list):
296 # Value is a __path__ key.
297 continue
298 if not hasattr(obj, 'indent'):
299 obj.indent = 0
300
301 if isinstance(obj, _Object):
302 new_objs = sorted(obj.children.values(),
303 key=lineno_key, reverse=True)
304 for ob in new_objs:
305 ob.indent = obj.indent + indent_level
306 objs.extend(new_objs)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000307 if isinstance(obj, Class):
csabella246ff3b2017-07-03 21:31:25 -0400308 print("{}class {} {} {}"
309 .format(' ' * obj.indent, obj.name, obj.super, obj.lineno))
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000310 elif isinstance(obj, Function):
csabella246ff3b2017-07-03 21:31:25 -0400311 print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno))
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000312
313if __name__ == "__main__":
314 _main()