blob: e9c1140b71bc7110de65cf30489f7c54e0607b70 [file] [log] [blame]
Guido van Rossum75dc4961998-03-05 03:42:00 +00001"""Find modules used by a script, using introspection."""
2
3import dis
4import imp
5import marshal
6import os
7import re
Guido van Rossum75dc4961998-03-05 03:42:00 +00008import sys
Guido van Rossum6b767ac2001-03-20 20:43:34 +00009import new
Guido van Rossum75dc4961998-03-05 03:42:00 +000010
Guido van Rossum75dc4961998-03-05 03:42:00 +000011IMPORT_NAME = dis.opname.index('IMPORT_NAME')
12IMPORT_FROM = dis.opname.index('IMPORT_FROM')
Guido van Rossum89990532000-09-15 16:37:42 +000013STORE_NAME = dis.opname.index('STORE_NAME')
14STORE_FAST = dis.opname.index('STORE_FAST')
15STORE_GLOBAL = dis.opname.index('STORE_GLOBAL')
16STORE_OPS = [STORE_NAME, STORE_FAST, STORE_GLOBAL]
Guido van Rossum75dc4961998-03-05 03:42:00 +000017
Guido van Rossumf1b5a0e1998-05-18 20:21:56 +000018# Modulefinder does a good job at simulating Python's, but it can not
19# handle __path__ modifications packages make at runtime. Therefore there
20# is a mechanism whereby you can register extra paths in this map for a
Thomas Wouters7e474022000-07-16 12:04:32 +000021# package, and it will be honored.
Guido van Rossumf1b5a0e1998-05-18 20:21:56 +000022
23# Note this is a mapping is lists of paths.
24packagePathMap = {}
25
26# A Public interface
27def AddPackagePath(packagename, path):
Guido van Rossume7e632a1998-09-14 16:02:28 +000028 paths = packagePathMap.get(packagename, [])
29 paths.append(path)
30 packagePathMap[packagename] = paths
Guido van Rossum75dc4961998-03-05 03:42:00 +000031
32class Module:
33
34 def __init__(self, name, file=None, path=None):
Guido van Rossum912a14c1998-03-05 04:56:37 +000035 self.__name__ = name
36 self.__file__ = file
37 self.__path__ = path
38 self.__code__ = None
Guido van Rossum75dc4961998-03-05 03:42:00 +000039
40 def __repr__(self):
Guido van Rossum912a14c1998-03-05 04:56:37 +000041 s = "Module(%s" % `self.__name__`
42 if self.__file__ is not None:
43 s = s + ", %s" % `self.__file__`
44 if self.__path__ is not None:
45 s = s + ", %s" % `self.__path__`
46 s = s + ")"
47 return s
Guido van Rossum75dc4961998-03-05 03:42:00 +000048
Mark Hammond773c83b2001-09-05 23:42:36 +000049_warned = 0
50
51def _try_registry(name):
52 # Emulate the Registered Module support on Windows.
53 try:
54 import _winreg
55 RegQueryValue = _winreg.QueryValue
56 HKLM = _winreg.HKEY_LOCAL_MACHINE
57 exception = _winreg.error
58 except ImportError:
59 try:
60 import win32api
61 RegQueryValue = win32api.RegQueryValue
62 HKLM = 0x80000002 # HKEY_LOCAL_MACHINE
63 exception = win32api.error
64 except ImportError:
65 global _warned
66 if not _warned:
67 _warned = 1
68 print "Warning: Neither _winreg nor win32api is available - modules"
69 print "listed in the registry will not be found"
70 return None
71 try:
72 pathname = RegQueryValue(HKLM, \
73 r"Software\Python\PythonCore\%s\Modules\%s" % (sys.winver, name))
74 fp = open(pathname, "rb")
75 except exception:
76 return None
77 else:
78 # XXX - To do - remove the hard code of C_EXTENSION.
79 stuff = "", "rb", imp.C_EXTENSION
80 return fp, pathname, stuff
Guido van Rossum75dc4961998-03-05 03:42:00 +000081
82class ModuleFinder:
83
Guido van Rossum6b767ac2001-03-20 20:43:34 +000084 def __init__(self, path=None, debug=0, excludes = [], replace_paths = []):
Guido van Rossum912a14c1998-03-05 04:56:37 +000085 if path is None:
86 path = sys.path
87 self.path = path
88 self.modules = {}
89 self.badmodules = {}
90 self.debug = debug
91 self.indent = 0
Guido van Rossum78fc3631998-03-20 17:37:24 +000092 self.excludes = excludes
Guido van Rossum6b767ac2001-03-20 20:43:34 +000093 self.replace_paths = replace_paths
94 self.processed_paths = [] # Used in debugging only
Guido van Rossum75dc4961998-03-05 03:42:00 +000095
96 def msg(self, level, str, *args):
Guido van Rossum912a14c1998-03-05 04:56:37 +000097 if level <= self.debug:
98 for i in range(self.indent):
99 print " ",
100 print str,
101 for arg in args:
102 print repr(arg),
103 print
Guido van Rossum75dc4961998-03-05 03:42:00 +0000104
105 def msgin(self, *args):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000106 level = args[0]
107 if level <= self.debug:
108 self.indent = self.indent + 1
109 apply(self.msg, args)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000110
111 def msgout(self, *args):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000112 level = args[0]
113 if level <= self.debug:
114 self.indent = self.indent - 1
115 apply(self.msg, args)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000116
117 def run_script(self, pathname):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000118 self.msg(2, "run_script", pathname)
119 fp = open(pathname)
120 stuff = ("", "r", imp.PY_SOURCE)
121 self.load_module('__main__', fp, pathname, stuff)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000122
123 def load_file(self, pathname):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000124 dir, name = os.path.split(pathname)
125 name, ext = os.path.splitext(name)
126 fp = open(pathname)
127 stuff = (ext, "r", imp.PY_SOURCE)
128 self.load_module(name, fp, pathname, stuff)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000129
130 def import_hook(self, name, caller=None, fromlist=None):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000131 self.msg(3, "import_hook", name, caller, fromlist)
132 parent = self.determine_parent(caller)
133 q, tail = self.find_head_package(parent, name)
134 m = self.load_tail(q, tail)
135 if not fromlist:
136 return q
137 if m.__path__:
138 self.ensure_fromlist(m, fromlist)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000139
140 def determine_parent(self, caller):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000141 self.msgin(4, "determine_parent", caller)
142 if not caller:
143 self.msgout(4, "determine_parent -> None")
144 return None
145 pname = caller.__name__
146 if caller.__path__:
147 parent = self.modules[pname]
148 assert caller is parent
149 self.msgout(4, "determine_parent ->", parent)
150 return parent
151 if '.' in pname:
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000152 i = pname.rfind('.')
Guido van Rossum912a14c1998-03-05 04:56:37 +0000153 pname = pname[:i]
154 parent = self.modules[pname]
155 assert parent.__name__ == pname
156 self.msgout(4, "determine_parent ->", parent)
157 return parent
158 self.msgout(4, "determine_parent -> None")
159 return None
Guido van Rossum75dc4961998-03-05 03:42:00 +0000160
161 def find_head_package(self, parent, name):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000162 self.msgin(4, "find_head_package", parent, name)
163 if '.' in name:
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000164 i = name.find('.')
Guido van Rossum912a14c1998-03-05 04:56:37 +0000165 head = name[:i]
166 tail = name[i+1:]
167 else:
168 head = name
169 tail = ""
170 if parent:
171 qname = "%s.%s" % (parent.__name__, head)
172 else:
173 qname = head
174 q = self.import_module(head, qname, parent)
175 if q:
176 self.msgout(4, "find_head_package ->", (q, tail))
177 return q, tail
178 if parent:
179 qname = head
180 parent = None
181 q = self.import_module(head, qname, parent)
182 if q:
183 self.msgout(4, "find_head_package ->", (q, tail))
184 return q, tail
185 self.msgout(4, "raise ImportError: No module named", qname)
186 raise ImportError, "No module named " + qname
Guido van Rossum75dc4961998-03-05 03:42:00 +0000187
188 def load_tail(self, q, tail):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000189 self.msgin(4, "load_tail", q, tail)
190 m = q
191 while tail:
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000192 i = tail.find('.')
Guido van Rossum912a14c1998-03-05 04:56:37 +0000193 if i < 0: i = len(tail)
194 head, tail = tail[:i], tail[i+1:]
195 mname = "%s.%s" % (m.__name__, head)
196 m = self.import_module(head, mname, m)
197 if not m:
198 self.msgout(4, "raise ImportError: No module named", mname)
199 raise ImportError, "No module named " + mname
200 self.msgout(4, "load_tail ->", m)
201 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000202
203 def ensure_fromlist(self, m, fromlist, recursive=0):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000204 self.msg(4, "ensure_fromlist", m, fromlist, recursive)
205 for sub in fromlist:
206 if sub == "*":
207 if not recursive:
208 all = self.find_all_submodules(m)
209 if all:
210 self.ensure_fromlist(m, all, 1)
211 elif not hasattr(m, sub):
212 subname = "%s.%s" % (m.__name__, sub)
213 submod = self.import_module(sub, subname, m)
214 if not submod:
215 raise ImportError, "No module named " + subname
Guido van Rossum75dc4961998-03-05 03:42:00 +0000216
217 def find_all_submodules(self, m):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000218 if not m.__path__:
219 return
220 modules = {}
221 suffixes = [".py", ".pyc", ".pyo"]
222 for dir in m.__path__:
223 try:
224 names = os.listdir(dir)
225 except os.error:
226 self.msg(2, "can't list directory", dir)
227 continue
228 for name in names:
229 mod = None
230 for suff in suffixes:
231 n = len(suff)
232 if name[-n:] == suff:
233 mod = name[:-n]
234 break
235 if mod and mod != "__init__":
236 modules[mod] = mod
237 return modules.keys()
Guido van Rossum75dc4961998-03-05 03:42:00 +0000238
239 def import_module(self, partname, fqname, parent):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000240 self.msgin(3, "import_module", partname, fqname, parent)
241 try:
242 m = self.modules[fqname]
243 except KeyError:
244 pass
245 else:
246 self.msgout(3, "import_module ->", m)
247 return m
248 if self.badmodules.has_key(fqname):
249 self.msgout(3, "import_module -> None")
Guido van Rossum8b4b46e1999-11-02 15:46:44 +0000250 if parent:
251 self.badmodules[fqname][parent.__name__] = None
Guido van Rossum912a14c1998-03-05 04:56:37 +0000252 return None
253 try:
254 fp, pathname, stuff = self.find_module(partname,
255 parent and parent.__path__)
256 except ImportError:
257 self.msgout(3, "import_module ->", None)
258 return None
259 try:
260 m = self.load_module(fqname, fp, pathname, stuff)
261 finally:
262 if fp: fp.close()
263 if parent:
264 setattr(parent, partname, m)
265 self.msgout(3, "import_module ->", m)
266 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000267
268 def load_module(self, fqname, fp, pathname, (suffix, mode, type)):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000269 self.msgin(2, "load_module", fqname, fp and "fp", pathname)
270 if type == imp.PKG_DIRECTORY:
271 m = self.load_package(fqname, pathname)
272 self.msgout(2, "load_module ->", m)
273 return m
274 if type == imp.PY_SOURCE:
Guido van Rossum78fc3631998-03-20 17:37:24 +0000275 co = compile(fp.read()+'\n', pathname, 'exec')
Guido van Rossum912a14c1998-03-05 04:56:37 +0000276 elif type == imp.PY_COMPILED:
277 if fp.read(4) != imp.get_magic():
278 self.msgout(2, "raise ImportError: Bad magic number", pathname)
Guido van Rossumce33eb32000-05-02 13:49:13 +0000279 raise ImportError, "Bad magic number in %s" % pathname
Guido van Rossum912a14c1998-03-05 04:56:37 +0000280 fp.read(4)
281 co = marshal.load(fp)
282 else:
283 co = None
284 m = self.add_module(fqname)
Guido van Rossumab045f91998-03-06 19:55:10 +0000285 m.__file__ = pathname
Guido van Rossum912a14c1998-03-05 04:56:37 +0000286 if co:
Guido van Rossum6b767ac2001-03-20 20:43:34 +0000287 if self.replace_paths:
288 co = self.replace_paths_in_code(co)
Guido van Rossum912a14c1998-03-05 04:56:37 +0000289 m.__code__ = co
Guido van Rossum3c51cf21998-03-05 05:15:07 +0000290 self.scan_code(co, m)
Guido van Rossum912a14c1998-03-05 04:56:37 +0000291 self.msgout(2, "load_module ->", m)
292 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000293
Guido van Rossum3c51cf21998-03-05 05:15:07 +0000294 def scan_code(self, co, m):
295 code = co.co_code
296 n = len(code)
297 i = 0
298 lastname = None
299 while i < n:
300 c = code[i]
301 i = i+1
302 op = ord(c)
303 if op >= dis.HAVE_ARGUMENT:
304 oparg = ord(code[i]) + ord(code[i+1])*256
305 i = i+2
306 if op == IMPORT_NAME:
307 name = lastname = co.co_names[oparg]
308 if not self.badmodules.has_key(lastname):
309 try:
310 self.import_hook(name, m)
311 except ImportError, msg:
312 self.msg(2, "ImportError:", str(msg))
Guido van Rossumfdd30281998-12-22 13:44:01 +0000313 if not self.badmodules.has_key(name):
314 self.badmodules[name] = {}
315 self.badmodules[name][m.__name__] = None
Guido van Rossum3c51cf21998-03-05 05:15:07 +0000316 elif op == IMPORT_FROM:
317 name = co.co_names[oparg]
318 assert lastname is not None
319 if not self.badmodules.has_key(lastname):
320 try:
321 self.import_hook(lastname, m, [name])
322 except ImportError, msg:
323 self.msg(2, "ImportError:", str(msg))
324 fullname = lastname + "." + name
Guido van Rossumfdd30281998-12-22 13:44:01 +0000325 if not self.badmodules.has_key(fullname):
326 self.badmodules[fullname] = {}
327 self.badmodules[fullname][m.__name__] = None
Guido van Rossum89990532000-09-15 16:37:42 +0000328 elif op in STORE_OPS:
329 # Skip; each IMPORT_FROM is followed by a STORE_* opcode
330 pass
Guido van Rossum3c51cf21998-03-05 05:15:07 +0000331 else:
332 lastname = None
333 for c in co.co_consts:
334 if isinstance(c, type(co)):
335 self.scan_code(c, m)
336
Guido van Rossum75dc4961998-03-05 03:42:00 +0000337 def load_package(self, fqname, pathname):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000338 self.msgin(2, "load_package", fqname, pathname)
339 m = self.add_module(fqname)
340 m.__file__ = pathname
341 m.__path__ = [pathname]
Guido van Rossumf1b5a0e1998-05-18 20:21:56 +0000342
Guido van Rossume7e632a1998-09-14 16:02:28 +0000343 # As per comment at top of file, simulate runtime __path__ additions.
344 m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
Guido van Rossumf1b5a0e1998-05-18 20:21:56 +0000345
Guido van Rossum912a14c1998-03-05 04:56:37 +0000346 fp, buf, stuff = self.find_module("__init__", m.__path__)
347 self.load_module(fqname, fp, buf, stuff)
348 self.msgout(2, "load_package ->", m)
349 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000350
351 def add_module(self, fqname):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000352 if self.modules.has_key(fqname):
353 return self.modules[fqname]
354 self.modules[fqname] = m = Module(fqname)
355 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000356
357 def find_module(self, name, path):
Guido van Rossum03f7f082001-10-18 19:15:32 +0000358 if path:
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000359 fullname = '.'.join(path)+'.'+name
Guido van Rossum03f7f082001-10-18 19:15:32 +0000360 else:
361 fullname = name
362 if fullname in self.excludes:
363 self.msgout(3, "find_module -> Excluded", fullname)
Guido van Rossum78fc3631998-03-20 17:37:24 +0000364 raise ImportError, name
365
Guido van Rossum912a14c1998-03-05 04:56:37 +0000366 if path is None:
367 if name in sys.builtin_module_names:
368 return (None, None, ("", "", imp.C_BUILTIN))
Guido van Rossum78fc3631998-03-20 17:37:24 +0000369
Mark Hammondd3d7bb12000-11-06 02:49:27 +0000370 if sys.platform=="win32":
Mark Hammond773c83b2001-09-05 23:42:36 +0000371 result = _try_registry(name)
372 if result:
373 return result
374
Guido van Rossum912a14c1998-03-05 04:56:37 +0000375 path = self.path
376 return imp.find_module(name, path)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000377
378 def report(self):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000379 print
380 print " %-25s %s" % ("Name", "File")
381 print " %-25s %s" % ("----", "----")
382 # Print modules found
383 keys = self.modules.keys()
384 keys.sort()
385 for key in keys:
386 m = self.modules[key]
387 if m.__path__:
388 print "P",
389 else:
390 print "m",
391 print "%-25s" % key, m.__file__ or ""
Guido van Rossum75dc4961998-03-05 03:42:00 +0000392
Guido van Rossum912a14c1998-03-05 04:56:37 +0000393 # Print missing modules
394 keys = self.badmodules.keys()
395 keys.sort()
396 for key in keys:
Thomas Wouters7e474022000-07-16 12:04:32 +0000397 # ... but not if they were explicitly excluded.
Guido van Rossumbaf06031998-08-25 14:06:55 +0000398 if key not in self.excludes:
Guido van Rossum38b92eb1998-12-15 15:35:23 +0000399 mods = self.badmodules[key].keys()
400 mods.sort()
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000401 print "?", key, "from", ', '.join(mods)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000402
Guido van Rossum03f7f082001-10-18 19:15:32 +0000403 def any_missing(self):
404 keys = self.badmodules.keys()
405 missing = []
406 for key in keys:
407 if key not in self.excludes:
408 # Missing, and its not supposed to be
409 missing.append(key)
410 return missing
411
Guido van Rossum6b767ac2001-03-20 20:43:34 +0000412 def replace_paths_in_code(self, co):
413 new_filename = original_filename = os.path.normpath(co.co_filename)
414 for f,r in self.replace_paths:
415 if original_filename.startswith(f):
416 new_filename = r+original_filename[len(f):]
417 break
418
419 if self.debug and original_filename not in self.processed_paths:
420 if new_filename!=original_filename:
421 self.msgout(2, "co_filename %r changed to %r" \
422 % (original_filename,new_filename,))
423 else:
424 self.msgout(2, "co_filename %r remains unchanged" \
425 % (original_filename,))
426 self.processed_paths.append(original_filename)
427
428 consts = list(co.co_consts)
429 for i in range(len(consts)):
430 if isinstance(consts[i], type(co)):
431 consts[i] = self.replace_paths_in_code(consts[i])
432
433 return new.code(co.co_argcount, co.co_nlocals, co.co_stacksize,
434 co.co_flags, co.co_code, tuple(consts), co.co_names,
435 co.co_varnames, new_filename, co.co_name,
436 co.co_firstlineno, co.co_lnotab)
437
Guido van Rossum75dc4961998-03-05 03:42:00 +0000438
439def test():
440 # Parse command line
441 import getopt
442 try:
Guido van Rossumbaf06031998-08-25 14:06:55 +0000443 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
Guido van Rossum75dc4961998-03-05 03:42:00 +0000444 except getopt.error, msg:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000445 print msg
446 return
Guido van Rossum75dc4961998-03-05 03:42:00 +0000447
448 # Process options
449 debug = 1
450 domods = 0
451 addpath = []
Guido van Rossumbaf06031998-08-25 14:06:55 +0000452 exclude = []
Guido van Rossum75dc4961998-03-05 03:42:00 +0000453 for o, a in opts:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000454 if o == '-d':
455 debug = debug + 1
456 if o == '-m':
457 domods = 1
458 if o == '-p':
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000459 addpath = addpath + a.split(os.pathsep)
Guido van Rossum912a14c1998-03-05 04:56:37 +0000460 if o == '-q':
461 debug = 0
Guido van Rossumbaf06031998-08-25 14:06:55 +0000462 if o == '-x':
463 exclude.append(a)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000464
465 # Provide default arguments
466 if not args:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000467 script = "hello.py"
Guido van Rossum75dc4961998-03-05 03:42:00 +0000468 else:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000469 script = args[0]
Guido van Rossum75dc4961998-03-05 03:42:00 +0000470
471 # Set the path based on sys.path and the script directory
472 path = sys.path[:]
473 path[0] = os.path.dirname(script)
474 path = addpath + path
475 if debug > 1:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000476 print "path:"
477 for item in path:
478 print " ", `item`
Guido van Rossum75dc4961998-03-05 03:42:00 +0000479
480 # Create the module finder and turn its crank
Guido van Rossumbaf06031998-08-25 14:06:55 +0000481 mf = ModuleFinder(path, debug, exclude)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000482 for arg in args[1:]:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000483 if arg == '-m':
484 domods = 1
485 continue
486 if domods:
487 if arg[-2:] == '.*':
488 mf.import_hook(arg[:-2], None, ["*"])
489 else:
490 mf.import_hook(arg)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000491 else:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000492 mf.load_file(arg)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000493 mf.run_script(script)
494 mf.report()
495
496
497if __name__ == '__main__':
498 try:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000499 test()
Guido van Rossum75dc4961998-03-05 03:42:00 +0000500 except KeyboardInterrupt:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000501 print "\n[interrupt]"