bpo-6691: Pyclbr now reports nested classes and functions. (#2503)

 Original patch by Guilherme Polo.  Revisions by Cheryl Sabella.
diff --git a/Lib/pyclbr.py b/Lib/pyclbr.py
index d7dba97..2c798df 100644
--- a/Lib/pyclbr.py
+++ b/Lib/pyclbr.py
@@ -1,42 +1,41 @@
-"""Parse a Python module and describe its classes and methods.
+"""Parse a Python module and describe its classes and functions.
 
 Parse enough of a Python file to recognize imports and class and
-method definitions, and to find out the superclasses of a class.
+function definitions, and to find out the superclasses of a class.
 
 The interface consists of a single function:
-        readmodule_ex(module [, path])
+    readmodule_ex(module, path=None)
 where module is the name of a Python module, and path is an optional
 list of directories where the module is to be searched.  If present,
-path is prepended to the system search path sys.path.  The return
-value is a dictionary.  The keys of the dictionary are the names of
-the classes defined in the module (including classes that are defined
-via the from XXX import YYY construct).  The values are class
-instances of the class Class defined here.  One special key/value pair
-is present for packages: the key '__path__' has a list as its value
-which contains the package search path.
+path is prepended to the system search path sys.path.  The return value
+is a dictionary.  The keys of the dictionary are the names of the
+classes and functions defined in the module (including classes that are
+defined via the from XXX import YYY construct).  The values are
+instances of classes Class and Function.  One special key/value pair is
+present for packages: the key '__path__' has a list as its value which
+contains the package search path.
 
-A class is described by the class Class in this module.  Instances
-of this class have the following instance variables:
-        module -- the module name
-        name -- the name of the class
-        super -- a list of super classes (Class instances)
-        methods -- a dictionary of methods
-        file -- the file in which the class was defined
-        lineno -- the line in the file on which the class statement occurred
-The dictionary of methods uses the method names as keys and the line
-numbers on which the method was defined as values.
+Classes and Functions have a common superclass: _Object.  Every instance
+has the following attributes:
+    module  -- name of the module;
+    name    -- name of the object;
+    file    -- file in which the object is defined;
+    lineno  -- line in the file where the object's definition starts;
+    parent  -- parent of this object, if any;
+    children -- nested objects contained in this object.
+The 'children' attribute is a dictionary mapping names to objects.
+
+Instances of Function describe functions with the attributes from _Object.
+
+Instances of Class describe classes with the attributes from _Object,
+plus the following:
+    super   -- list of super classes (Class instances if possible);
+    methods -- mapping of method names to beginning line numbers.
 If the name of a super class is not recognized, the corresponding
 entry in the list of super classes is not a class instance but a
 string giving the name of the super class.  Since import statements
 are recognized and imported modules are scanned as well, this
 shouldn't happen often.
-
-A function is described by the class Function in this module.
-Instances of this class have the following instance variables:
-        module -- the module name
-        name -- the name of the class
-        file -- the file in which the class was defined
-        lineno -- the line in the file on which the class statement occurred
 """
 
 import io
@@ -47,37 +46,59 @@
 
 __all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
 
-_modules = {}                           # cache of modules we've seen
+_modules = {}  # Initialize cache of modules we've seen.
 
-# each Python class is represented by an instance of this class
-class Class:
-    '''Class to represent a Python class.'''
-    def __init__(self, module, name, super, file, lineno):
+
+class _Object:
+    "Informaton about Python class or function."
+    def __init__(self, module, name, file, lineno, parent):
         self.module = module
         self.name = name
-        if super is None:
-            super = []
-        self.super = super
-        self.methods = {}
         self.file = file
         self.lineno = lineno
+        self.parent = parent
+        self.children = {}
+
+    def _addchild(self, name, obj):
+        self.children[name] = obj
+
+
+class Function(_Object):
+    "Information about a Python function, including methods."
+    def __init__(self, module, name, file, lineno, parent=None):
+        _Object.__init__(self, module, name, file, lineno, parent)
+
+
+class Class(_Object):
+    "Information about a Python class."
+    def __init__(self, module, name, super, file, lineno, parent=None):
+        _Object.__init__(self, module, name, file, lineno, parent)
+        self.super = [] if super is None else super
+        self.methods = {}
 
     def _addmethod(self, name, lineno):
         self.methods[name] = lineno
 
-class Function:
-    '''Class to represent a top-level Python function'''
-    def __init__(self, module, name, file, lineno):
-        self.module = module
-        self.name = name
-        self.file = file
-        self.lineno = lineno
+
+def _nest_function(ob, func_name, lineno):
+    "Return a Function after nesting within ob."
+    newfunc = Function(ob.module, func_name, ob.file, lineno, ob)
+    ob._addchild(func_name, newfunc)
+    if isinstance(ob, Class):
+        ob._addmethod(func_name, lineno)
+    return newfunc
+
+def _nest_class(ob, class_name, lineno, super=None):
+    "Return a Class after nesting within ob."
+    newclass = Class(ob.module, class_name, super, ob.file, lineno, ob)
+    ob._addchild(class_name, newclass)
+    return newclass
 
 def readmodule(module, path=None):
-    '''Backwards compatible interface.
+    """Return Class objects for the top-level classes in module.
 
-    Call readmodule_ex() and then only keep Class objects from the
-    resulting dictionary.'''
+    This is the original interface, before Functions were added.
+    """
 
     res = {}
     for key, value in _readmodule(module, path or []).items():
@@ -86,41 +107,41 @@
     return res
 
 def readmodule_ex(module, path=None):
-    '''Read a module file and return a dictionary of classes.
+    """Return a dictionary with all functions and classes in module.
 
-    Search for MODULE in PATH and sys.path, read and parse the
-    module and return a dictionary with one entry for each class
-    found in the module.
-    '''
+    Search for module in PATH + sys.path.
+    If possible, include imported superclasses.
+    Do this by reading source, without importing (and executing) it.
+    """
     return _readmodule(module, path or [])
 
 def _readmodule(module, path, inpackage=None):
-    '''Do the hard work for readmodule[_ex].
+    """Do the hard work for readmodule[_ex].
 
-    If INPACKAGE is given, it must be the dotted name of the package in
+    If inpackage is given, it must be the dotted name of the package in
     which we are searching for a submodule, and then PATH must be the
     package search path; otherwise, we are searching for a top-level
-    module, and PATH is combined with sys.path.
-    '''
-    # Compute the full module name (prepending inpackage if set)
+    module, and path is combined with sys.path.
+    """
+    # Compute the full module name (prepending inpackage if set).
     if inpackage is not None:
         fullmodule = "%s.%s" % (inpackage, module)
     else:
         fullmodule = module
 
-    # Check in the cache
+    # Check in the cache.
     if fullmodule in _modules:
         return _modules[fullmodule]
 
-    # Initialize the dict for this module's contents
-    dict = {}
+    # Initialize the dict for this module's contents.
+    tree = {}
 
-    # Check if it is a built-in module; we don't do much for these
+    # Check if it is a built-in module; we don't do much for these.
     if module in sys.builtin_module_names and inpackage is None:
-        _modules[module] = dict
-        return dict
+        _modules[module] = tree
+        return tree
 
-    # Check for a dotted module name
+    # Check for a dotted module name.
     i = module.rfind('.')
     if i >= 0:
         package = module[:i]
@@ -132,88 +153,97 @@
             raise ImportError('No package named {}'.format(package))
         return _readmodule(submodule, parent['__path__'], package)
 
-    # Search the path for the module
+    # Search the path for the module.
     f = None
     if inpackage is not None:
         search_path = path
     else:
         search_path = path + sys.path
-    # XXX This will change once issue19944 lands.
     spec = importlib.util._find_spec_from_path(fullmodule, search_path)
-    _modules[fullmodule] = dict
-    # is module a package?
+    _modules[fullmodule] = tree
+    # Is module a package?
     if spec.submodule_search_locations is not None:
-        dict['__path__'] = spec.submodule_search_locations
+        tree['__path__'] = spec.submodule_search_locations
     try:
         source = spec.loader.get_source(fullmodule)
         if source is None:
-            return dict
+            return tree
     except (AttributeError, ImportError):
-        # not Python source, can't do anything with this module
-        return dict
+        # If module is not Python source, we cannot do anything.
+        return tree
 
     fname = spec.loader.get_filename(fullmodule)
+    return _create_tree(fullmodule, path, fname, source, tree, inpackage)
 
+
+def _create_tree(fullmodule, path, fname, source, tree, inpackage):
+    """Return the tree for a particular module.
+
+    fullmodule (full module name), inpackage+module, becomes o.module.
+    path is passed to recursive calls of _readmodule.
+    fname becomes o.file.
+    source is tokenized.  Imports cause recursive calls to _readmodule.
+    tree is {} or {'__path__': <submodule search locations>}.
+    inpackage, None or string, is passed to recursive calls of _readmodule.
+
+    The effect of recursive calls is mutation of global _modules.
+    """
     f = io.StringIO(source)
 
-    stack = [] # stack of (class, indent) pairs
+    stack = [] # Initialize stack of (class, indent) pairs.
 
     g = tokenize.generate_tokens(f.readline)
     try:
         for tokentype, token, start, _end, _line in g:
             if tokentype == DEDENT:
                 lineno, thisindent = start
-                # close nested classes and defs
+                # Close previous nested classes and defs.
                 while stack and stack[-1][1] >= thisindent:
                     del stack[-1]
             elif token == 'def':
                 lineno, thisindent = start
-                # close previous nested classes and defs
+                # Close previous nested classes and defs.
                 while stack and stack[-1][1] >= thisindent:
                     del stack[-1]
-                tokentype, meth_name, start = next(g)[0:3]
+                tokentype, func_name, start = next(g)[0:3]
                 if tokentype != NAME:
-                    continue # Syntax error
+                    continue  # Skip def with syntax error.
+                cur_func = None
                 if stack:
-                    cur_class = stack[-1][0]
-                    if isinstance(cur_class, Class):
-                        # it's a method
-                        cur_class._addmethod(meth_name, lineno)
-                    # else it's a nested def
+                    cur_obj = stack[-1][0]
+                    cur_func = _nest_function(cur_obj, func_name, lineno)
                 else:
-                    # it's a function
-                    dict[meth_name] = Function(fullmodule, meth_name,
-                                               fname, lineno)
-                stack.append((None, thisindent)) # Marker for nested fns
+                    # It is just a function.
+                    cur_func = Function(fullmodule, func_name, fname, lineno)
+                    tree[func_name] = cur_func
+                stack.append((cur_func, thisindent))
             elif token == 'class':
                 lineno, thisindent = start
-                # close previous nested classes and defs
+                # Close previous nested classes and defs.
                 while stack and stack[-1][1] >= thisindent:
                     del stack[-1]
                 tokentype, class_name, start = next(g)[0:3]
                 if tokentype != NAME:
-                    continue # Syntax error
-                # parse what follows the class name
+                    continue # Skip class with syntax error.
+                # Parse what follows the class name.
                 tokentype, token, start = next(g)[0:3]
                 inherit = None
                 if token == '(':
-                    names = [] # List of superclasses
-                    # there's a list of superclasses
+                    names = [] # Initialize list of superclasses.
                     level = 1
-                    super = [] # Tokens making up current superclass
+                    super = [] # Tokens making up current superclass.
                     while True:
                         tokentype, token, start = next(g)[0:3]
                         if token in (')', ',') and level == 1:
                             n = "".join(super)
-                            if n in dict:
-                                # we know this super class
-                                n = dict[n]
+                            if n in tree:
+                                # We know this super class.
+                                n = tree[n]
                             else:
                                 c = n.split('.')
                                 if len(c) > 1:
-                                    # super class is of the form
-                                    # module.class: look in module for
-                                    # class
+                                    # Super class form is module.class:
+                                    # look in module for class.
                                     m = c[-2]
                                     c = c[-1]
                                     if m in _modules:
@@ -230,21 +260,25 @@
                                 break
                         elif token == ',' and level == 1:
                             pass
-                        # only use NAME and OP (== dot) tokens for type name
+                        # Only use NAME and OP (== dot) tokens for type name.
                         elif tokentype in (NAME, OP) and level == 1:
                             super.append(token)
-                        # expressions in the base list are not supported
+                        # Expressions in the base list are not supported.
                     inherit = names
-                cur_class = Class(fullmodule, class_name, inherit,
-                                  fname, lineno)
-                if not stack:
-                    dict[class_name] = cur_class
+                if stack:
+                    cur_obj = stack[-1][0]
+                    cur_class = _nest_class(
+                            cur_obj, class_name, lineno, inherit)
+                else:
+                    cur_class = Class(fullmodule, class_name, inherit,
+                                      fname, lineno)
+                    tree[class_name] = cur_class
                 stack.append((cur_class, thisindent))
             elif token == 'import' and start[1] == 0:
                 modules = _getnamelist(g)
                 for mod, _mod2 in modules:
                     try:
-                        # Recursively read the imported module
+                        # Recursively read the imported module.
                         if inpackage is None:
                             _readmodule(mod, path)
                         else:
@@ -262,32 +296,34 @@
                     continue
                 names = _getnamelist(g)
                 try:
-                    # Recursively read the imported module
+                    # Recursively read the imported module.
                     d = _readmodule(mod, path, inpackage)
                 except:
                     # If we can't find or parse the imported module,
                     # too bad -- don't die here.
                     continue
-                # add any classes that were defined in the imported module
-                # to our name space if they were mentioned in the list
+                # Add any classes that were defined in the imported module
+                # to our name space if they were mentioned in the list.
                 for n, n2 in names:
                     if n in d:
-                        dict[n2 or n] = d[n]
+                        tree[n2 or n] = d[n]
                     elif n == '*':
-                        # don't add names that start with _
+                        # Don't add names that start with _.
                         for n in d:
                             if n[0] != '_':
-                                dict[n] = d[n]
+                                tree[n] = d[n]
     except StopIteration:
         pass
 
     f.close()
-    return dict
+    return tree
+
 
 def _getnamelist(g):
-    # Helper to get a comma-separated list of dotted names plus 'as'
-    # clauses.  Return a list of pairs (name, name2) where name2 is
-    # the 'as' name, or None if there is no 'as' clause.
+    """Return list of (dotted-name, as-name or None) tuples for token source g.
+
+    An as-name is the name that follows 'as' in an as clause.
+    """
     names = []
     while True:
         name, token = _getname(g)
@@ -304,10 +340,9 @@
             break
     return names
 
+
 def _getname(g):
-    # Helper to get a dotted name, return a pair (name, token) where
-    # name is the dotted name, or None if there was no dotted name,
-    # and token is the next input token.
+    "Return (dotted-name or None, next-token) tuple for token source g."
     parts = []
     tokentype, token = next(g)[0:2]
     if tokentype != NAME and token != '*':
@@ -323,11 +358,14 @@
         parts.append(token)
     return (".".join(parts), token)
 
+
 def _main():
-    # Main program for testing.
+    "Print module output (default this file) for quick visual check."
     import os
-    from operator import itemgetter
-    mod = sys.argv[1]
+    try:
+        mod = sys.argv[1]
+    except:
+        mod = __file__
     if os.path.exists(mod):
         path = [os.path.dirname(mod)]
         mod = os.path.basename(mod)
@@ -335,18 +373,29 @@
             mod = mod[:-3]
     else:
         path = []
-    dict = readmodule_ex(mod, path)
-    objs = list(dict.values())
-    objs.sort(key=lambda a: getattr(a, 'lineno', 0))
-    for obj in objs:
+    tree = readmodule_ex(mod, path)
+    lineno_key = lambda a: getattr(a, 'lineno', 0)
+    objs = sorted(tree.values(), key=lineno_key, reverse=True)
+    indent_level = 2
+    while objs:
+        obj = objs.pop()
+        if isinstance(obj, list):
+            # Value is a __path__ key.
+            continue
+        if not hasattr(obj, 'indent'):
+            obj.indent = 0
+
+        if isinstance(obj, _Object):
+            new_objs = sorted(obj.children.values(),
+                              key=lineno_key, reverse=True)
+            for ob in new_objs:
+                ob.indent = obj.indent + indent_level
+            objs.extend(new_objs)
         if isinstance(obj, Class):
-            print("class", obj.name, obj.super, obj.lineno)
-            methods = sorted(obj.methods.items(), key=itemgetter(1))
-            for name, lineno in methods:
-                if name != "__path__":
-                    print("  def", name, lineno)
+            print("{}class {} {} {}"
+                  .format(' ' * obj.indent, obj.name, obj.super, obj.lineno))
         elif isinstance(obj, Function):
-            print("def", obj.name, obj.lineno)
+            print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno))
 
 if __name__ == "__main__":
     _main()