blob: ebcc23c29da215b983bf16a126308901f1970b53 [file] [log] [blame]
csabella246ff3b2017-07-03 21:31:25 -04001"""Parse a Python module and describe its classes and functions.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00002
Guido van Rossum0a6f9542002-12-03 08:14:35 +00003Parse enough of a Python file to recognize imports and class and
csabella246ff3b2017-07-03 21:31:25 -04004function definitions, and to find out the superclasses of a class.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00005
6The interface consists of a single function:
csabella246ff3b2017-07-03 21:31:25 -04007 readmodule_ex(module, path=None)
Guido van Rossum0a6f9542002-12-03 08:14:35 +00008where module is the name of a Python module, and path is an optional
9list of directories where the module is to be searched. If present,
csabella246ff3b2017-07-03 21:31:25 -040010path is prepended to the system search path sys.path. The return value
11is a dictionary. The keys of the dictionary are the names of the
12classes and functions defined in the module (including classes that are
13defined via the from XXX import YYY construct). The values are
14instances of classes Class and Function. One special key/value pair is
15present for packages: the key '__path__' has a list as its value which
16contains the package search path.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000017
csabella246ff3b2017-07-03 21:31:25 -040018Classes and Functions have a common superclass: _Object. Every instance
19has the following attributes:
20 module -- name of the module;
21 name -- name of the object;
22 file -- file in which the object is defined;
23 lineno -- line in the file where the object's definition starts;
Aviral Srivastava000cde52021-02-01 09:38:44 -080024 end_lineno -- line in the file where the object's definition ends;
csabella246ff3b2017-07-03 21:31:25 -040025 parent -- parent of this object, if any;
26 children -- nested objects contained in this object.
27The 'children' attribute is a dictionary mapping names to objects.
28
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030029Instances of Function describe functions with the attributes from _Object,
30plus the following:
31 is_async -- if a function is defined with an 'async' prefix
csabella246ff3b2017-07-03 21:31:25 -040032
33Instances of Class describe classes with the attributes from _Object,
34plus the following:
35 super -- list of super classes (Class instances if possible);
36 methods -- mapping of method names to beginning line numbers.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000037If the name of a super class is not recognized, the corresponding
38entry in the list of super classes is not a class instance but a
39string giving the name of the super class. Since import statements
40are recognized and imported modules are scanned as well, this
41shouldn't happen often.
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +000042"""
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000043
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030044import ast
45import copy
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000046import sys
Eric Snow6029e082014-01-25 15:32:46 -070047import importlib.util
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000048
Guido van Rossum0ed7aa12002-12-02 14:54:20 +000049__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
Skip Montanaroc62c81e2001-02-12 02:00:42 +000050
csabella246ff3b2017-07-03 21:31:25 -040051_modules = {} # Initialize cache of modules we've seen.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000052
csabella246ff3b2017-07-03 21:31:25 -040053
54class _Object:
Xtreak0d702272019-06-03 04:42:33 +053055 "Information about Python class or function."
Aviral Srivastava000cde52021-02-01 09:38:44 -080056 def __init__(self, module, name, file, lineno, end_lineno, parent):
Tim Peters2344fae2001-01-15 00:50:52 +000057 self.module = module
58 self.name = name
Tim Peters2344fae2001-01-15 00:50:52 +000059 self.file = file
60 self.lineno = lineno
Aviral Srivastava000cde52021-02-01 09:38:44 -080061 self.end_lineno = end_lineno
csabella246ff3b2017-07-03 21:31:25 -040062 self.parent = parent
63 self.children = {}
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030064 if parent is not None:
65 parent.children[name] = self
csabella246ff3b2017-07-03 21:31:25 -040066
Aviral Srivastava000cde52021-02-01 09:38:44 -080067
68# Odd Function and Class signatures are for back-compatibility.
csabella246ff3b2017-07-03 21:31:25 -040069class Function(_Object):
70 "Information about a Python function, including methods."
Aviral Srivastava000cde52021-02-01 09:38:44 -080071 def __init__(self, module, name, file, lineno,
72 parent=None, is_async=False, *, end_lineno=None):
73 super().__init__(module, name, file, lineno, end_lineno, parent)
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030074 self.is_async = is_async
75 if isinstance(parent, Class):
76 parent.methods[name] = lineno
csabella246ff3b2017-07-03 21:31:25 -040077
Aviral Srivastava000cde52021-02-01 09:38:44 -080078
csabella246ff3b2017-07-03 21:31:25 -040079class Class(_Object):
80 "Information about a Python class."
Aviral Srivastava000cde52021-02-01 09:38:44 -080081 def __init__(self, module, name, super_, file, lineno,
82 parent=None, *, end_lineno=None):
83 super().__init__(module, name, file, lineno, end_lineno, parent)
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030084 self.super = super_ or []
csabella246ff3b2017-07-03 21:31:25 -040085 self.methods = {}
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000086
Aviral Srivastava000cde52021-02-01 09:38:44 -080087
Batuhan Taskayafa476fe2020-11-11 10:14:12 +030088# These 2 functions are used in these tests
89# Lib/test/test_pyclbr, Lib/idlelib/idle_test/test_browser.py
Aviral Srivastava000cde52021-02-01 09:38:44 -080090def _nest_function(ob, func_name, lineno, end_lineno, is_async=False):
csabella246ff3b2017-07-03 21:31:25 -040091 "Return a Function after nesting within ob."
Aviral Srivastava000cde52021-02-01 09:38:44 -080092 return Function(ob.module, func_name, ob.file, lineno,
93 parent=ob, is_async=is_async, end_lineno=end_lineno)
csabella246ff3b2017-07-03 21:31:25 -040094
Aviral Srivastava000cde52021-02-01 09:38:44 -080095def _nest_class(ob, class_name, lineno, end_lineno, super=None):
csabella246ff3b2017-07-03 21:31:25 -040096 "Return a Class after nesting within ob."
Aviral Srivastava000cde52021-02-01 09:38:44 -080097 return Class(ob.module, class_name, super, ob.file, lineno,
98 parent=ob, end_lineno=end_lineno)
99
Guido van Rossuma3b4a331999-06-10 14:39:39 +0000100
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000101def readmodule(module, path=None):
csabella246ff3b2017-07-03 21:31:25 -0400102 """Return Class objects for the top-level classes in module.
Guido van Rossuma3b4a331999-06-10 14:39:39 +0000103
csabella246ff3b2017-07-03 21:31:25 -0400104 This is the original interface, before Functions were added.
105 """
Guido van Rossuma3b4a331999-06-10 14:39:39 +0000106
Tim Peters2344fae2001-01-15 00:50:52 +0000107 res = {}
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000108 for key, value in _readmodule(module, path or []).items():
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000109 if isinstance(value, Class):
Tim Peters2344fae2001-01-15 00:50:52 +0000110 res[key] = value
111 return res
Guido van Rossuma3b4a331999-06-10 14:39:39 +0000112
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000113def readmodule_ex(module, path=None):
csabella246ff3b2017-07-03 21:31:25 -0400114 """Return a dictionary with all functions and classes in module.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000115
csabella246ff3b2017-07-03 21:31:25 -0400116 Search for module in PATH + sys.path.
117 If possible, include imported superclasses.
118 Do this by reading source, without importing (and executing) it.
119 """
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000120 return _readmodule(module, path or [])
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000121
Aviral Srivastava000cde52021-02-01 09:38:44 -0800122
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000123def _readmodule(module, path, inpackage=None):
csabella246ff3b2017-07-03 21:31:25 -0400124 """Do the hard work for readmodule[_ex].
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000125
csabella246ff3b2017-07-03 21:31:25 -0400126 If inpackage is given, it must be the dotted name of the package in
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000127 which we are searching for a submodule, and then PATH must be the
128 package search path; otherwise, we are searching for a top-level
csabella246ff3b2017-07-03 21:31:25 -0400129 module, and path is combined with sys.path.
130 """
131 # Compute the full module name (prepending inpackage if set).
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000132 if inpackage is not None:
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000133 fullmodule = "%s.%s" % (inpackage, module)
134 else:
135 fullmodule = module
136
csabella246ff3b2017-07-03 21:31:25 -0400137 # Check in the cache.
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000138 if fullmodule in _modules:
139 return _modules[fullmodule]
140
csabella246ff3b2017-07-03 21:31:25 -0400141 # Initialize the dict for this module's contents.
142 tree = {}
Guido van Rossum3d548711999-06-09 15:49:09 +0000143
csabella246ff3b2017-07-03 21:31:25 -0400144 # Check if it is a built-in module; we don't do much for these.
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000145 if module in sys.builtin_module_names and inpackage is None:
csabella246ff3b2017-07-03 21:31:25 -0400146 _modules[module] = tree
147 return tree
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000148
csabella246ff3b2017-07-03 21:31:25 -0400149 # Check for a dotted module name.
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000150 i = module.rfind('.')
151 if i >= 0:
152 package = module[:i]
153 submodule = module[i+1:]
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000154 parent = _readmodule(package, path, inpackage)
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000155 if inpackage is not None:
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000156 package = "%s.%s" % (inpackage, package)
Petri Lehtinen8d886042012-05-18 21:51:11 +0300157 if not '__path__' in parent:
158 raise ImportError('No package named {}'.format(package))
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000159 return _readmodule(submodule, parent['__path__'], package)
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000160
csabella246ff3b2017-07-03 21:31:25 -0400161 # Search the path for the module.
Tim Peters2344fae2001-01-15 00:50:52 +0000162 f = None
Christian Heimes81ee3ef2008-05-04 22:42:01 +0000163 if inpackage is not None:
Brett Cannonee78a2b2012-05-12 17:43:17 -0400164 search_path = path
Guido van Rossum0ed7aa12002-12-02 14:54:20 +0000165 else:
Brett Cannonee78a2b2012-05-12 17:43:17 -0400166 search_path = path + sys.path
Eric Snow6029e082014-01-25 15:32:46 -0700167 spec = importlib.util._find_spec_from_path(fullmodule, search_path)
Brett Cannon50865892019-03-22 15:16:50 -0700168 if spec is None:
169 raise ModuleNotFoundError(f"no module named {fullmodule!r}", name=fullmodule)
csabella246ff3b2017-07-03 21:31:25 -0400170 _modules[fullmodule] = tree
171 # Is module a package?
Victor Stinner5c13aa12016-03-17 09:06:41 +0100172 if spec.submodule_search_locations is not None:
csabella246ff3b2017-07-03 21:31:25 -0400173 tree['__path__'] = spec.submodule_search_locations
Brett Cannonee78a2b2012-05-12 17:43:17 -0400174 try:
Eric Snow02b9f9d2014-01-06 20:42:59 -0700175 source = spec.loader.get_source(fullmodule)
Brett Cannonee78a2b2012-05-12 17:43:17 -0400176 except (AttributeError, ImportError):
csabella246ff3b2017-07-03 21:31:25 -0400177 # If module is not Python source, we cannot do anything.
178 return tree
Brett Cannon50865892019-03-22 15:16:50 -0700179 else:
180 if source is None:
181 return tree
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000182
Victor Stinner5c13aa12016-03-17 09:06:41 +0100183 fname = spec.loader.get_filename(fullmodule)
csabella246ff3b2017-07-03 21:31:25 -0400184 return _create_tree(fullmodule, path, fname, source, tree, inpackage)
Victor Stinner5c13aa12016-03-17 09:06:41 +0100185
csabella246ff3b2017-07-03 21:31:25 -0400186
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300187class _ModuleBrowser(ast.NodeVisitor):
188 def __init__(self, module, path, file, tree, inpackage):
189 self.path = path
190 self.tree = tree
191 self.file = file
192 self.module = module
193 self.inpackage = inpackage
194 self.stack = []
csabella246ff3b2017-07-03 21:31:25 -0400195
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300196 def visit_ClassDef(self, node):
197 bases = []
198 for base in node.bases:
199 name = ast.unparse(base)
200 if name in self.tree:
201 # We know this super class.
202 bases.append(self.tree[name])
203 elif len(names := name.split(".")) > 1:
204 # Super class form is module.class:
205 # look in module for class.
206 *_, module, class_ = names
207 if module in _modules:
208 bases.append(_modules[module].get(class_, name))
209 else:
210 bases.append(name)
csabella246ff3b2017-07-03 21:31:25 -0400211
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300212 parent = self.stack[-1] if self.stack else None
Aviral Srivastava000cde52021-02-01 09:38:44 -0800213 class_ = Class(self.module, node.name, bases, self.file, node.lineno,
214 parent=parent, end_lineno=node.end_lineno)
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300215 if parent is None:
216 self.tree[node.name] = class_
217 self.stack.append(class_)
218 self.generic_visit(node)
219 self.stack.pop()
Brett Cannonee78a2b2012-05-12 17:43:17 -0400220
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300221 def visit_FunctionDef(self, node, *, is_async=False):
222 parent = self.stack[-1] if self.stack else None
Aviral Srivastava000cde52021-02-01 09:38:44 -0800223 function = Function(self.module, node.name, self.file, node.lineno,
224 parent, is_async, end_lineno=node.end_lineno)
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300225 if parent is None:
226 self.tree[node.name] = function
227 self.stack.append(function)
228 self.generic_visit(node)
229 self.stack.pop()
Guido van Rossumad380551999-06-07 15:25:18 +0000230
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300231 def visit_AsyncFunctionDef(self, node):
232 self.visit_FunctionDef(node, is_async=True)
233
234 def visit_Import(self, node):
235 if node.col_offset != 0:
236 return
237
238 for module in node.names:
239 try:
Tim Peters2344fae2001-01-15 00:50:52 +0000240 try:
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300241 _readmodule(module.name, self.path, self.inpackage)
242 except ImportError:
243 _readmodule(module.name, [])
244 except (ImportError, SyntaxError):
245 # If we can't find or parse the imported module,
246 # too bad -- don't die here.
247 continue
Guido van Rossumad380551999-06-07 15:25:18 +0000248
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300249 def visit_ImportFrom(self, node):
250 if node.col_offset != 0:
251 return
252 try:
253 module = "." * node.level
254 if node.module:
255 module += node.module
256 module = _readmodule(module, self.path, self.inpackage)
257 except (ImportError, SyntaxError):
258 return
259
260 for name in node.names:
261 if name.name in module:
262 self.tree[name.asname or name.name] = module[name.name]
263 elif name.name == "*":
264 for import_name, import_value in module.items():
265 if import_name.startswith("_"):
266 continue
267 self.tree[import_name] = import_value
csabella246ff3b2017-07-03 21:31:25 -0400268
Guido van Rossumdf9f7a31999-06-08 12:53:21 +0000269
Batuhan Taskayafa476fe2020-11-11 10:14:12 +0300270def _create_tree(fullmodule, path, fname, source, tree, inpackage):
271 mbrowser = _ModuleBrowser(fullmodule, path, fname, tree, inpackage)
272 mbrowser.visit(ast.parse(source))
273 return mbrowser.tree
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000274
csabella246ff3b2017-07-03 21:31:25 -0400275
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000276def _main():
csabella246ff3b2017-07-03 21:31:25 -0400277 "Print module output (default this file) for quick visual check."
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000278 import os
csabella246ff3b2017-07-03 21:31:25 -0400279 try:
280 mod = sys.argv[1]
281 except:
282 mod = __file__
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000283 if os.path.exists(mod):
284 path = [os.path.dirname(mod)]
285 mod = os.path.basename(mod)
286 if mod.lower().endswith(".py"):
287 mod = mod[:-3]
288 else:
289 path = []
csabella246ff3b2017-07-03 21:31:25 -0400290 tree = readmodule_ex(mod, path)
291 lineno_key = lambda a: getattr(a, 'lineno', 0)
292 objs = sorted(tree.values(), key=lineno_key, reverse=True)
293 indent_level = 2
294 while objs:
295 obj = objs.pop()
296 if isinstance(obj, list):
297 # Value is a __path__ key.
298 continue
299 if not hasattr(obj, 'indent'):
300 obj.indent = 0
301
302 if isinstance(obj, _Object):
303 new_objs = sorted(obj.children.values(),
304 key=lineno_key, reverse=True)
305 for ob in new_objs:
306 ob.indent = obj.indent + indent_level
307 objs.extend(new_objs)
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000308 if isinstance(obj, Class):
csabella246ff3b2017-07-03 21:31:25 -0400309 print("{}class {} {} {}"
310 .format(' ' * obj.indent, obj.name, obj.super, obj.lineno))
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000311 elif isinstance(obj, Function):
csabella246ff3b2017-07-03 21:31:25 -0400312 print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno))
Guido van Rossum0a6f9542002-12-03 08:14:35 +0000313
314if __name__ == "__main__":
315 _main()