patch attached to sf item #643711:
any_missing() returns less bogus missing modules.
- I've rewritten scan_code() more or less from scratch,
factored bits and pieces out for readability.
- keep track of global assignments and failed imports per
module; use this to determine whether the Y in "from X
import Y" is a submodule or just a global name. This is not
100% doable: you can't tell which symbols are imported when
doing a star import of a non-Python module short of actually
importing it.
- added a new method to ModuleFinder: any_missing_maybe(),
which returns *two* lists, one with certain misses, one with
possible misses. The possible misses are *very* often false
alarms, so it's useful to keep this list separate.
any_misses() now simply returns the union of
any_missing_maybe().
TODO: documentation, test_modulefinder.py
diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py
index cd03db4..9d524c2 100644
--- a/Lib/modulefinder.py
+++ b/Lib/modulefinder.py
@@ -15,12 +15,11 @@
# remain compatible with Python < 2.3
READ_MODE = "r"
+LOAD_CONST = dis.opname.index('LOAD_CONST')
IMPORT_NAME = dis.opname.index('IMPORT_NAME')
-IMPORT_FROM = dis.opname.index('IMPORT_FROM')
STORE_NAME = dis.opname.index('STORE_NAME')
-STORE_FAST = dis.opname.index('STORE_FAST')
STORE_GLOBAL = dis.opname.index('STORE_GLOBAL')
-STORE_OPS = [STORE_NAME, STORE_FAST, STORE_GLOBAL]
+STORE_OPS = [STORE_NAME, STORE_GLOBAL]
# Modulefinder does a good job at simulating Python's, but it can not
# handle __path__ modifications packages make at runtime. Therefore there
@@ -54,6 +53,13 @@
self.__file__ = file
self.__path__ = path
self.__code__ = None
+ # The set of global names that are assigned to in the module.
+ # This includes those names imported through starimports of
+ # Python modules.
+ self.globalnames = {}
+ # The set of starimports this module did that could not be
+ # resolved, ie. a starimport from a non-Python module.
+ self.starimports = {}
def __repr__(self):
s = "Module(%s" % `self.__name__`
@@ -66,7 +72,7 @@
class ModuleFinder:
- def __init__(self, path=None, debug=0, excludes = [], replace_paths = []):
+ def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
if path is None:
path = sys.path
self.path = path
@@ -233,8 +239,6 @@
return m
if self.badmodules.has_key(fqname):
self.msgout(3, "import_module -> None")
- if parent:
- self.badmodules[fqname][parent.__name__] = None
return None
try:
fp, pathname, stuff = self.find_module(partname,
@@ -277,11 +281,39 @@
self.msgout(2, "load_module ->", m)
return m
+ def _add_badmodule(self, name, caller):
+ if name not in self.badmodules:
+ self.badmodules[name] = {}
+ self.badmodules[name][caller.__name__] = 1
+
+ def _safe_import_hook(self, name, caller, fromlist):
+ # wrapper for self.import_hook() that won't raise ImportError
+ if name in self.badmodules:
+ self._add_badmodule(name, caller)
+ return
+ try:
+ self.import_hook(name, caller)
+ except ImportError, msg:
+ self.msg(2, "ImportError:", str(msg))
+ self._add_badmodule(name, caller)
+ else:
+ if fromlist:
+ for sub in fromlist:
+ if sub in self.badmodules:
+ self._add_badmodule(sub, caller)
+ continue
+ try:
+ self.import_hook(name, caller, [sub])
+ except ImportError, msg:
+ self.msg(2, "ImportError:", str(msg))
+ fullname = name + "." + sub
+ self._add_badmodule(fullname, caller)
+
def scan_code(self, co, m):
code = co.co_code
n = len(code)
i = 0
- lastname = None
+ fromlist = None
while i < n:
c = code[i]
i = i+1
@@ -289,33 +321,43 @@
if op >= dis.HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256
i = i+2
- if op == IMPORT_NAME:
- name = lastname = co.co_names[oparg]
- if not self.badmodules.has_key(lastname):
- try:
- self.import_hook(name, m)
- except ImportError, msg:
- self.msg(2, "ImportError:", str(msg))
- if not self.badmodules.has_key(name):
- self.badmodules[name] = {}
- self.badmodules[name][m.__name__] = None
- elif op == IMPORT_FROM:
+ if op == LOAD_CONST:
+ # An IMPORT_NAME is always preceded by a LOAD_CONST, it's
+ # a tuple of "from" names, or None for a regular import.
+ # The tuple may contain "*" for "from <mod> import *"
+ fromlist = co.co_consts[oparg]
+ elif op == IMPORT_NAME:
+ assert fromlist is None or type(fromlist) is tuple
name = co.co_names[oparg]
- assert lastname is not None
- if not self.badmodules.has_key(lastname):
- try:
- self.import_hook(lastname, m, [name])
- except ImportError, msg:
- self.msg(2, "ImportError:", str(msg))
- fullname = lastname + "." + name
- if not self.badmodules.has_key(fullname):
- self.badmodules[fullname] = {}
- self.badmodules[fullname][m.__name__] = None
+ have_star = 0
+ if fromlist is not None:
+ if "*" in fromlist:
+ have_star = 1
+ fromlist = [f for f in fromlist if f != "*"]
+ self._safe_import_hook(name, m, fromlist)
+ if have_star:
+ # We've encountered an "import *". If it is a Python module,
+ # the code has already been parsed and we can suck out the
+ # global names.
+ mm = None
+ if m.__path__:
+ # At this point we don't know whether 'name' is a
+ # submodule of 'm' or a global module. Let's just try
+ # the full name first.
+ mm = self.modules.get(m.__name__ + "." + name)
+ if mm is None:
+ mm = self.modules.get(name)
+ if mm is not None:
+ m.globalnames.update(mm.globalnames)
+ m.starimports.update(mm.starimports)
+ if mm.__code__ is None:
+ m.starimports[name] = 1
+ else:
+ m.starimports[name] = 1
elif op in STORE_OPS:
- # Skip; each IMPORT_FROM is followed by a STORE_* opcode
- pass
- else:
- lastname = None
+ # keep track of all global names that are assigned to
+ name = co.co_names[oparg]
+ m.globalnames[name] = 1
for c in co.co_consts:
if isinstance(c, type(co)):
self.scan_code(c, m)
@@ -360,6 +402,9 @@
return imp.find_module(name, path)
def report(self):
+ """Print a report to stdout, listing the found modules with their
+ paths, as well as modules that are missing, or seem to be missing.
+ """
print
print " %-25s %s" % ("Name", "File")
print " %-25s %s" % ("----", "----")
@@ -367,6 +412,7 @@
keys = self.modules.keys()
keys.sort()
for key in keys:
+ continue
m = self.modules[key]
if m.__path__:
print "P",
@@ -375,33 +421,87 @@
print "%-25s" % key, m.__file__ or ""
# Print missing modules
- keys = self.badmodules.keys()
- keys.sort()
- for key in keys:
- # ... but not if they were explicitly excluded.
- if key not in self.excludes:
- mods = self.badmodules[key].keys()
+ missing, maybe = self.any_missing_maybe()
+ if missing:
+ print
+ print "Missing modules:"
+ for name in missing:
+ mods = self.badmodules[name].keys()
mods.sort()
- print "?", key, "from", ', '.join(mods)
+ print "?", name, "imported from", ', '.join(mods)
+ # Print modules that may be missing, but then again, maybe not...
+ if maybe:
+ print
+ print "Submodules thay appear to be missing, but could also be",
+ print "global names in the parent package:"
+ for name in maybe:
+ mods = self.badmodules[name].keys()
+ mods.sort()
+ print "?", name, "imported from", ', '.join(mods)
def any_missing(self):
- keys = self.badmodules.keys()
+ """Return a list of modules that appear to be missing. Use
+ any_missing_maybe() if you want to know which modules are
+ certain to be missing, and which *may* be missing.
+ """
+ missing, maybe = self.any_missing_maybe()
+ return missing + maybe
+
+ def any_missing_maybe(self):
+ """Return two lists, one with modules that are certainly missing
+ and one with modules that *may* be missing. The latter names could
+ either be submodules *or* just global names in the package.
+
+ The reason it can't always be determined is that it's impossible to
+ tell which names are imported when "from module import *" is done
+ with an extension module, short of actually importing it.
+ """
missing = []
- for key in keys:
- if key not in self.excludes:
- # Missing, and its not supposed to be
- missing.append(key)
- return missing
+ maybe = []
+ for name in self.badmodules:
+ if name in self.excludes:
+ continue
+ i = name.rfind(".")
+ if i < 0:
+ missing.append(name)
+ continue
+ subname = name[i+1:]
+ pkgname = name[:i]
+ pkg = self.modules.get(pkgname)
+ if pkg is not None:
+ if pkgname in self.badmodules[name]:
+ # The package tried to import this module itself and
+ # failed. It's definitely missing.
+ missing.append(name)
+ elif subname in pkg.globalnames:
+ # It's a global in the package: definitely not missing.
+ pass
+ elif pkg.starimports:
+ # It could be missing, but the package did an "import *"
+ # from a non-Python module, so we simply can't be sure.
+ maybe.append(name)
+ else:
+ # It's not a global in the package, the package didn't
+ # do funny star imports, it's very likely to be missing.
+ # The symbol could be inserted into the package from the
+ # outside, but since that's not good style we simply list
+ # it missing.
+ missing.append(name)
+ else:
+ missing.append(name)
+ missing.sort()
+ maybe.sort()
+ return missing, maybe
def replace_paths_in_code(self, co):
new_filename = original_filename = os.path.normpath(co.co_filename)
- for f,r in self.replace_paths:
+ for f, r in self.replace_paths:
if original_filename.startswith(f):
- new_filename = r+original_filename[len(f):]
+ new_filename = r + original_filename[len(f):]
break
if self.debug and original_filename not in self.processed_paths:
- if new_filename!=original_filename:
+ if new_filename != original_filename:
self.msgout(2, "co_filename %r changed to %r" \
% (original_filename,new_filename,))
else:
@@ -477,10 +577,11 @@
mf.load_file(arg)
mf.run_script(script)
mf.report()
+ return mf # for -i debugging
if __name__ == '__main__':
try:
- test()
+ mf = test()
except KeyboardInterrupt:
print "\n[interrupt]"