blob: 741ef4d9266fc0141c427dadf06bf1dfdb2a2558 [file] [log] [blame]
Guido van Rossum75dc4961998-03-05 03:42:00 +00001"""Find modules used by a script, using introspection."""
2
3import dis
4import imp
5import marshal
6import os
7import re
8import string
9import sys
Guido van Rossum6b767ac2001-03-20 20:43:34 +000010import new
Guido van Rossum75dc4961998-03-05 03:42:00 +000011
Guido van Rossum75dc4961998-03-05 03:42:00 +000012IMPORT_NAME = dis.opname.index('IMPORT_NAME')
13IMPORT_FROM = dis.opname.index('IMPORT_FROM')
Guido van Rossum89990532000-09-15 16:37:42 +000014STORE_NAME = dis.opname.index('STORE_NAME')
15STORE_FAST = dis.opname.index('STORE_FAST')
16STORE_GLOBAL = dis.opname.index('STORE_GLOBAL')
17STORE_OPS = [STORE_NAME, STORE_FAST, STORE_GLOBAL]
Guido van Rossum75dc4961998-03-05 03:42:00 +000018
Guido van Rossumf1b5a0e1998-05-18 20:21:56 +000019# Modulefinder does a good job at simulating Python's, but it can not
20# handle __path__ modifications packages make at runtime. Therefore there
21# is a mechanism whereby you can register extra paths in this map for a
Thomas Wouters7e474022000-07-16 12:04:32 +000022# package, and it will be honored.
Guido van Rossumf1b5a0e1998-05-18 20:21:56 +000023
24# Note this is a mapping is lists of paths.
25packagePathMap = {}
26
27# A Public interface
28def AddPackagePath(packagename, path):
Guido van Rossume7e632a1998-09-14 16:02:28 +000029 paths = packagePathMap.get(packagename, [])
30 paths.append(path)
31 packagePathMap[packagename] = paths
Guido van Rossum75dc4961998-03-05 03:42:00 +000032
33class Module:
34
35 def __init__(self, name, file=None, path=None):
Guido van Rossum912a14c1998-03-05 04:56:37 +000036 self.__name__ = name
37 self.__file__ = file
38 self.__path__ = path
39 self.__code__ = None
Guido van Rossum75dc4961998-03-05 03:42:00 +000040
41 def __repr__(self):
Guido van Rossum912a14c1998-03-05 04:56:37 +000042 s = "Module(%s" % `self.__name__`
43 if self.__file__ is not None:
44 s = s + ", %s" % `self.__file__`
45 if self.__path__ is not None:
46 s = s + ", %s" % `self.__path__`
47 s = s + ")"
48 return s
Guido van Rossum75dc4961998-03-05 03:42:00 +000049
Mark Hammond773c83b2001-09-05 23:42:36 +000050_warned = 0
51
52def _try_registry(name):
53 # Emulate the Registered Module support on Windows.
54 try:
55 import _winreg
56 RegQueryValue = _winreg.QueryValue
57 HKLM = _winreg.HKEY_LOCAL_MACHINE
58 exception = _winreg.error
59 except ImportError:
60 try:
61 import win32api
62 RegQueryValue = win32api.RegQueryValue
63 HKLM = 0x80000002 # HKEY_LOCAL_MACHINE
64 exception = win32api.error
65 except ImportError:
66 global _warned
67 if not _warned:
68 _warned = 1
69 print "Warning: Neither _winreg nor win32api is available - modules"
70 print "listed in the registry will not be found"
71 return None
72 try:
73 pathname = RegQueryValue(HKLM, \
74 r"Software\Python\PythonCore\%s\Modules\%s" % (sys.winver, name))
75 fp = open(pathname, "rb")
76 except exception:
77 return None
78 else:
79 # XXX - To do - remove the hard code of C_EXTENSION.
80 stuff = "", "rb", imp.C_EXTENSION
81 return fp, pathname, stuff
Guido van Rossum75dc4961998-03-05 03:42:00 +000082
83class ModuleFinder:
84
Guido van Rossum6b767ac2001-03-20 20:43:34 +000085 def __init__(self, path=None, debug=0, excludes = [], replace_paths = []):
Guido van Rossum912a14c1998-03-05 04:56:37 +000086 if path is None:
87 path = sys.path
88 self.path = path
89 self.modules = {}
90 self.badmodules = {}
91 self.debug = debug
92 self.indent = 0
Guido van Rossum78fc3631998-03-20 17:37:24 +000093 self.excludes = excludes
Guido van Rossum6b767ac2001-03-20 20:43:34 +000094 self.replace_paths = replace_paths
95 self.processed_paths = [] # Used in debugging only
Guido van Rossum75dc4961998-03-05 03:42:00 +000096
97 def msg(self, level, str, *args):
Guido van Rossum912a14c1998-03-05 04:56:37 +000098 if level <= self.debug:
99 for i in range(self.indent):
100 print " ",
101 print str,
102 for arg in args:
103 print repr(arg),
104 print
Guido van Rossum75dc4961998-03-05 03:42:00 +0000105
106 def msgin(self, *args):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000107 level = args[0]
108 if level <= self.debug:
109 self.indent = self.indent + 1
110 apply(self.msg, args)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000111
112 def msgout(self, *args):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000113 level = args[0]
114 if level <= self.debug:
115 self.indent = self.indent - 1
116 apply(self.msg, args)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000117
118 def run_script(self, pathname):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000119 self.msg(2, "run_script", pathname)
120 fp = open(pathname)
121 stuff = ("", "r", imp.PY_SOURCE)
122 self.load_module('__main__', fp, pathname, stuff)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000123
124 def load_file(self, pathname):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000125 dir, name = os.path.split(pathname)
126 name, ext = os.path.splitext(name)
127 fp = open(pathname)
128 stuff = (ext, "r", imp.PY_SOURCE)
129 self.load_module(name, fp, pathname, stuff)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000130
131 def import_hook(self, name, caller=None, fromlist=None):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000132 self.msg(3, "import_hook", name, caller, fromlist)
133 parent = self.determine_parent(caller)
134 q, tail = self.find_head_package(parent, name)
135 m = self.load_tail(q, tail)
136 if not fromlist:
137 return q
138 if m.__path__:
139 self.ensure_fromlist(m, fromlist)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000140
141 def determine_parent(self, caller):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000142 self.msgin(4, "determine_parent", caller)
143 if not caller:
144 self.msgout(4, "determine_parent -> None")
145 return None
146 pname = caller.__name__
147 if caller.__path__:
148 parent = self.modules[pname]
149 assert caller is parent
150 self.msgout(4, "determine_parent ->", parent)
151 return parent
152 if '.' in pname:
153 i = string.rfind(pname, '.')
154 pname = pname[:i]
155 parent = self.modules[pname]
156 assert parent.__name__ == pname
157 self.msgout(4, "determine_parent ->", parent)
158 return parent
159 self.msgout(4, "determine_parent -> None")
160 return None
Guido van Rossum75dc4961998-03-05 03:42:00 +0000161
162 def find_head_package(self, parent, name):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000163 self.msgin(4, "find_head_package", parent, name)
164 if '.' in name:
165 i = string.find(name, '.')
166 head = name[:i]
167 tail = name[i+1:]
168 else:
169 head = name
170 tail = ""
171 if parent:
172 qname = "%s.%s" % (parent.__name__, head)
173 else:
174 qname = head
175 q = self.import_module(head, qname, parent)
176 if q:
177 self.msgout(4, "find_head_package ->", (q, tail))
178 return q, tail
179 if parent:
180 qname = head
181 parent = None
182 q = self.import_module(head, qname, parent)
183 if q:
184 self.msgout(4, "find_head_package ->", (q, tail))
185 return q, tail
186 self.msgout(4, "raise ImportError: No module named", qname)
187 raise ImportError, "No module named " + qname
Guido van Rossum75dc4961998-03-05 03:42:00 +0000188
189 def load_tail(self, q, tail):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000190 self.msgin(4, "load_tail", q, tail)
191 m = q
192 while tail:
193 i = string.find(tail, '.')
194 if i < 0: i = len(tail)
195 head, tail = tail[:i], tail[i+1:]
196 mname = "%s.%s" % (m.__name__, head)
197 m = self.import_module(head, mname, m)
198 if not m:
199 self.msgout(4, "raise ImportError: No module named", mname)
200 raise ImportError, "No module named " + mname
201 self.msgout(4, "load_tail ->", m)
202 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000203
204 def ensure_fromlist(self, m, fromlist, recursive=0):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000205 self.msg(4, "ensure_fromlist", m, fromlist, recursive)
206 for sub in fromlist:
207 if sub == "*":
208 if not recursive:
209 all = self.find_all_submodules(m)
210 if all:
211 self.ensure_fromlist(m, all, 1)
212 elif not hasattr(m, sub):
213 subname = "%s.%s" % (m.__name__, sub)
214 submod = self.import_module(sub, subname, m)
215 if not submod:
216 raise ImportError, "No module named " + subname
Guido van Rossum75dc4961998-03-05 03:42:00 +0000217
218 def find_all_submodules(self, m):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000219 if not m.__path__:
220 return
221 modules = {}
222 suffixes = [".py", ".pyc", ".pyo"]
223 for dir in m.__path__:
224 try:
225 names = os.listdir(dir)
226 except os.error:
227 self.msg(2, "can't list directory", dir)
228 continue
229 for name in names:
230 mod = None
231 for suff in suffixes:
232 n = len(suff)
233 if name[-n:] == suff:
234 mod = name[:-n]
235 break
236 if mod and mod != "__init__":
237 modules[mod] = mod
238 return modules.keys()
Guido van Rossum75dc4961998-03-05 03:42:00 +0000239
240 def import_module(self, partname, fqname, parent):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000241 self.msgin(3, "import_module", partname, fqname, parent)
242 try:
243 m = self.modules[fqname]
244 except KeyError:
245 pass
246 else:
247 self.msgout(3, "import_module ->", m)
248 return m
249 if self.badmodules.has_key(fqname):
250 self.msgout(3, "import_module -> None")
Guido van Rossum8b4b46e1999-11-02 15:46:44 +0000251 if parent:
252 self.badmodules[fqname][parent.__name__] = None
Guido van Rossum912a14c1998-03-05 04:56:37 +0000253 return None
254 try:
255 fp, pathname, stuff = self.find_module(partname,
256 parent and parent.__path__)
257 except ImportError:
258 self.msgout(3, "import_module ->", None)
259 return None
260 try:
261 m = self.load_module(fqname, fp, pathname, stuff)
262 finally:
263 if fp: fp.close()
264 if parent:
265 setattr(parent, partname, m)
266 self.msgout(3, "import_module ->", m)
267 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000268
269 def load_module(self, fqname, fp, pathname, (suffix, mode, type)):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000270 self.msgin(2, "load_module", fqname, fp and "fp", pathname)
271 if type == imp.PKG_DIRECTORY:
272 m = self.load_package(fqname, pathname)
273 self.msgout(2, "load_module ->", m)
274 return m
275 if type == imp.PY_SOURCE:
Guido van Rossum78fc3631998-03-20 17:37:24 +0000276 co = compile(fp.read()+'\n', pathname, 'exec')
Guido van Rossum912a14c1998-03-05 04:56:37 +0000277 elif type == imp.PY_COMPILED:
278 if fp.read(4) != imp.get_magic():
279 self.msgout(2, "raise ImportError: Bad magic number", pathname)
Guido van Rossumce33eb32000-05-02 13:49:13 +0000280 raise ImportError, "Bad magic number in %s" % pathname
Guido van Rossum912a14c1998-03-05 04:56:37 +0000281 fp.read(4)
282 co = marshal.load(fp)
283 else:
284 co = None
285 m = self.add_module(fqname)
Guido van Rossumab045f91998-03-06 19:55:10 +0000286 m.__file__ = pathname
Guido van Rossum912a14c1998-03-05 04:56:37 +0000287 if co:
Guido van Rossum6b767ac2001-03-20 20:43:34 +0000288 if self.replace_paths:
289 co = self.replace_paths_in_code(co)
Guido van Rossum912a14c1998-03-05 04:56:37 +0000290 m.__code__ = co
Guido van Rossum3c51cf21998-03-05 05:15:07 +0000291 self.scan_code(co, m)
Guido van Rossum912a14c1998-03-05 04:56:37 +0000292 self.msgout(2, "load_module ->", m)
293 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000294
Guido van Rossum3c51cf21998-03-05 05:15:07 +0000295 def scan_code(self, co, m):
296 code = co.co_code
297 n = len(code)
298 i = 0
299 lastname = None
300 while i < n:
301 c = code[i]
302 i = i+1
303 op = ord(c)
304 if op >= dis.HAVE_ARGUMENT:
305 oparg = ord(code[i]) + ord(code[i+1])*256
306 i = i+2
307 if op == IMPORT_NAME:
308 name = lastname = co.co_names[oparg]
309 if not self.badmodules.has_key(lastname):
310 try:
311 self.import_hook(name, m)
312 except ImportError, msg:
313 self.msg(2, "ImportError:", str(msg))
Guido van Rossumfdd30281998-12-22 13:44:01 +0000314 if not self.badmodules.has_key(name):
315 self.badmodules[name] = {}
316 self.badmodules[name][m.__name__] = None
Guido van Rossum3c51cf21998-03-05 05:15:07 +0000317 elif op == IMPORT_FROM:
318 name = co.co_names[oparg]
319 assert lastname is not None
320 if not self.badmodules.has_key(lastname):
321 try:
322 self.import_hook(lastname, m, [name])
323 except ImportError, msg:
324 self.msg(2, "ImportError:", str(msg))
325 fullname = lastname + "." + name
Guido van Rossumfdd30281998-12-22 13:44:01 +0000326 if not self.badmodules.has_key(fullname):
327 self.badmodules[fullname] = {}
328 self.badmodules[fullname][m.__name__] = None
Guido van Rossum89990532000-09-15 16:37:42 +0000329 elif op in STORE_OPS:
330 # Skip; each IMPORT_FROM is followed by a STORE_* opcode
331 pass
Guido van Rossum3c51cf21998-03-05 05:15:07 +0000332 else:
333 lastname = None
334 for c in co.co_consts:
335 if isinstance(c, type(co)):
336 self.scan_code(c, m)
337
Guido van Rossum75dc4961998-03-05 03:42:00 +0000338 def load_package(self, fqname, pathname):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000339 self.msgin(2, "load_package", fqname, pathname)
340 m = self.add_module(fqname)
341 m.__file__ = pathname
342 m.__path__ = [pathname]
Guido van Rossumf1b5a0e1998-05-18 20:21:56 +0000343
Guido van Rossume7e632a1998-09-14 16:02:28 +0000344 # As per comment at top of file, simulate runtime __path__ additions.
345 m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
Guido van Rossumf1b5a0e1998-05-18 20:21:56 +0000346
Guido van Rossum912a14c1998-03-05 04:56:37 +0000347 fp, buf, stuff = self.find_module("__init__", m.__path__)
348 self.load_module(fqname, fp, buf, stuff)
349 self.msgout(2, "load_package ->", m)
350 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000351
352 def add_module(self, fqname):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000353 if self.modules.has_key(fqname):
354 return self.modules[fqname]
355 self.modules[fqname] = m = Module(fqname)
356 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000357
358 def find_module(self, name, path):
Guido van Rossum03f7f082001-10-18 19:15:32 +0000359 if path:
Thomas Heller4f7829e2002-06-10 20:05:48 +0000360 fullname = string.join(path, '.')+'.'+name
Guido van Rossum03f7f082001-10-18 19:15:32 +0000361 else:
362 fullname = name
363 if fullname in self.excludes:
364 self.msgout(3, "find_module -> Excluded", fullname)
Guido van Rossum78fc3631998-03-20 17:37:24 +0000365 raise ImportError, name
366
Guido van Rossum912a14c1998-03-05 04:56:37 +0000367 if path is None:
368 if name in sys.builtin_module_names:
369 return (None, None, ("", "", imp.C_BUILTIN))
Guido van Rossum78fc3631998-03-20 17:37:24 +0000370
Mark Hammondd3d7bb12000-11-06 02:49:27 +0000371 if sys.platform=="win32":
Mark Hammond773c83b2001-09-05 23:42:36 +0000372 result = _try_registry(name)
373 if result:
374 return result
375
Guido van Rossum912a14c1998-03-05 04:56:37 +0000376 path = self.path
377 return imp.find_module(name, path)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000378
379 def report(self):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000380 print
381 print " %-25s %s" % ("Name", "File")
382 print " %-25s %s" % ("----", "----")
383 # Print modules found
384 keys = self.modules.keys()
385 keys.sort()
386 for key in keys:
387 m = self.modules[key]
388 if m.__path__:
389 print "P",
390 else:
391 print "m",
392 print "%-25s" % key, m.__file__ or ""
Guido van Rossum75dc4961998-03-05 03:42:00 +0000393
Guido van Rossum912a14c1998-03-05 04:56:37 +0000394 # Print missing modules
395 keys = self.badmodules.keys()
396 keys.sort()
397 for key in keys:
Thomas Wouters7e474022000-07-16 12:04:32 +0000398 # ... but not if they were explicitly excluded.
Guido van Rossumbaf06031998-08-25 14:06:55 +0000399 if key not in self.excludes:
Guido van Rossum38b92eb1998-12-15 15:35:23 +0000400 mods = self.badmodules[key].keys()
401 mods.sort()
402 print "?", key, "from", string.join(mods, ', ')
Guido van Rossum75dc4961998-03-05 03:42:00 +0000403
Guido van Rossum03f7f082001-10-18 19:15:32 +0000404 def any_missing(self):
405 keys = self.badmodules.keys()
406 missing = []
407 for key in keys:
408 if key not in self.excludes:
409 # Missing, and its not supposed to be
410 missing.append(key)
411 return missing
412
Guido van Rossum6b767ac2001-03-20 20:43:34 +0000413 def replace_paths_in_code(self, co):
414 new_filename = original_filename = os.path.normpath(co.co_filename)
415 for f,r in self.replace_paths:
416 if original_filename.startswith(f):
417 new_filename = r+original_filename[len(f):]
418 break
419
420 if self.debug and original_filename not in self.processed_paths:
421 if new_filename!=original_filename:
422 self.msgout(2, "co_filename %r changed to %r" \
423 % (original_filename,new_filename,))
424 else:
425 self.msgout(2, "co_filename %r remains unchanged" \
426 % (original_filename,))
427 self.processed_paths.append(original_filename)
428
429 consts = list(co.co_consts)
430 for i in range(len(consts)):
431 if isinstance(consts[i], type(co)):
432 consts[i] = self.replace_paths_in_code(consts[i])
433
434 return new.code(co.co_argcount, co.co_nlocals, co.co_stacksize,
435 co.co_flags, co.co_code, tuple(consts), co.co_names,
436 co.co_varnames, new_filename, co.co_name,
437 co.co_firstlineno, co.co_lnotab)
438
Guido van Rossum75dc4961998-03-05 03:42:00 +0000439
440def test():
441 # Parse command line
442 import getopt
443 try:
Guido van Rossumbaf06031998-08-25 14:06:55 +0000444 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
Guido van Rossum75dc4961998-03-05 03:42:00 +0000445 except getopt.error, msg:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000446 print msg
447 return
Guido van Rossum75dc4961998-03-05 03:42:00 +0000448
449 # Process options
450 debug = 1
451 domods = 0
452 addpath = []
Guido van Rossumbaf06031998-08-25 14:06:55 +0000453 exclude = []
Guido van Rossum75dc4961998-03-05 03:42:00 +0000454 for o, a in opts:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000455 if o == '-d':
456 debug = debug + 1
457 if o == '-m':
458 domods = 1
459 if o == '-p':
460 addpath = addpath + string.split(a, os.pathsep)
461 if o == '-q':
462 debug = 0
Guido van Rossumbaf06031998-08-25 14:06:55 +0000463 if o == '-x':
464 exclude.append(a)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000465
466 # Provide default arguments
467 if not args:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000468 script = "hello.py"
Guido van Rossum75dc4961998-03-05 03:42:00 +0000469 else:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000470 script = args[0]
Guido van Rossum75dc4961998-03-05 03:42:00 +0000471
472 # Set the path based on sys.path and the script directory
473 path = sys.path[:]
474 path[0] = os.path.dirname(script)
475 path = addpath + path
476 if debug > 1:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000477 print "path:"
478 for item in path:
479 print " ", `item`
Guido van Rossum75dc4961998-03-05 03:42:00 +0000480
481 # Create the module finder and turn its crank
Guido van Rossumbaf06031998-08-25 14:06:55 +0000482 mf = ModuleFinder(path, debug, exclude)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000483 for arg in args[1:]:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000484 if arg == '-m':
485 domods = 1
486 continue
487 if domods:
488 if arg[-2:] == '.*':
489 mf.import_hook(arg[:-2], None, ["*"])
490 else:
491 mf.import_hook(arg)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000492 else:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000493 mf.load_file(arg)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000494 mf.run_script(script)
495 mf.report()
496
497
498if __name__ == '__main__':
499 try:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000500 test()
Guido van Rossum75dc4961998-03-05 03:42:00 +0000501 except KeyboardInterrupt:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000502 print "\n[interrupt]"