blob: cd03db4d80c61b39a9380cd8341d6f48c61b5d9d [file] [log] [blame]
Guido van Rossum75dc4961998-03-05 03:42:00 +00001"""Find modules used by a script, using introspection."""
2
Thomas Heller919000e2002-11-25 20:21:59 +00003# This module should be kept compatible with Python 1.5.2, see PEP 291.
4
Guido van Rossum75dc4961998-03-05 03:42:00 +00005import dis
6import imp
7import marshal
8import os
Guido van Rossum75dc4961998-03-05 03:42:00 +00009import sys
Guido van Rossum6b767ac2001-03-20 20:43:34 +000010import new
Guido van Rossum75dc4961998-03-05 03:42:00 +000011
Just van Rossum5d0bd1e2002-11-26 09:53:16 +000012if hasattr(sys.__stdout__, "newlines"):
13 READ_MODE = "U" # universal line endings
14else:
15 # remain compatible with Python < 2.3
16 READ_MODE = "r"
17
Guido van Rossum75dc4961998-03-05 03:42:00 +000018IMPORT_NAME = dis.opname.index('IMPORT_NAME')
19IMPORT_FROM = dis.opname.index('IMPORT_FROM')
Guido van Rossum89990532000-09-15 16:37:42 +000020STORE_NAME = dis.opname.index('STORE_NAME')
21STORE_FAST = dis.opname.index('STORE_FAST')
22STORE_GLOBAL = dis.opname.index('STORE_GLOBAL')
23STORE_OPS = [STORE_NAME, STORE_FAST, STORE_GLOBAL]
Guido van Rossum75dc4961998-03-05 03:42:00 +000024
Guido van Rossumf1b5a0e1998-05-18 20:21:56 +000025# Modulefinder does a good job at simulating Python's, but it can not
26# handle __path__ modifications packages make at runtime. Therefore there
27# is a mechanism whereby you can register extra paths in this map for a
Thomas Wouters7e474022000-07-16 12:04:32 +000028# package, and it will be honored.
Guido van Rossumf1b5a0e1998-05-18 20:21:56 +000029
30# Note this is a mapping is lists of paths.
31packagePathMap = {}
32
33# A Public interface
34def AddPackagePath(packagename, path):
Guido van Rossume7e632a1998-09-14 16:02:28 +000035 paths = packagePathMap.get(packagename, [])
36 paths.append(path)
37 packagePathMap[packagename] = paths
Guido van Rossum75dc4961998-03-05 03:42:00 +000038
Thomas Hellerc7aaf952002-11-14 18:45:11 +000039replacePackageMap = {}
40
41# This ReplacePackage mechanism allows modulefinder to work around the
42# way the _xmlplus package injects itself under the name "xml" into
43# sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml")
44# before running ModuleFinder.
45
46def ReplacePackage(oldname, newname):
47 replacePackageMap[oldname] = newname
48
49
Guido van Rossum75dc4961998-03-05 03:42:00 +000050class Module:
51
52 def __init__(self, name, file=None, path=None):
Guido van Rossum912a14c1998-03-05 04:56:37 +000053 self.__name__ = name
54 self.__file__ = file
55 self.__path__ = path
56 self.__code__ = None
Guido van Rossum75dc4961998-03-05 03:42:00 +000057
58 def __repr__(self):
Guido van Rossum912a14c1998-03-05 04:56:37 +000059 s = "Module(%s" % `self.__name__`
60 if self.__file__ is not None:
61 s = s + ", %s" % `self.__file__`
62 if self.__path__ is not None:
63 s = s + ", %s" % `self.__path__`
64 s = s + ")"
65 return s
Guido van Rossum75dc4961998-03-05 03:42:00 +000066
Guido van Rossum75dc4961998-03-05 03:42:00 +000067class ModuleFinder:
68
Guido van Rossum6b767ac2001-03-20 20:43:34 +000069 def __init__(self, path=None, debug=0, excludes = [], replace_paths = []):
Guido van Rossum912a14c1998-03-05 04:56:37 +000070 if path is None:
71 path = sys.path
72 self.path = path
73 self.modules = {}
74 self.badmodules = {}
75 self.debug = debug
76 self.indent = 0
Guido van Rossum78fc3631998-03-20 17:37:24 +000077 self.excludes = excludes
Guido van Rossum6b767ac2001-03-20 20:43:34 +000078 self.replace_paths = replace_paths
79 self.processed_paths = [] # Used in debugging only
Guido van Rossum75dc4961998-03-05 03:42:00 +000080
81 def msg(self, level, str, *args):
Guido van Rossum912a14c1998-03-05 04:56:37 +000082 if level <= self.debug:
83 for i in range(self.indent):
84 print " ",
85 print str,
86 for arg in args:
87 print repr(arg),
88 print
Guido van Rossum75dc4961998-03-05 03:42:00 +000089
90 def msgin(self, *args):
Guido van Rossum912a14c1998-03-05 04:56:37 +000091 level = args[0]
92 if level <= self.debug:
93 self.indent = self.indent + 1
94 apply(self.msg, args)
Guido van Rossum75dc4961998-03-05 03:42:00 +000095
96 def msgout(self, *args):
Guido van Rossum912a14c1998-03-05 04:56:37 +000097 level = args[0]
98 if level <= self.debug:
99 self.indent = self.indent - 1
100 apply(self.msg, args)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000101
102 def run_script(self, pathname):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000103 self.msg(2, "run_script", pathname)
Just van Rossum5d0bd1e2002-11-26 09:53:16 +0000104 fp = open(pathname, READ_MODE)
Guido van Rossum912a14c1998-03-05 04:56:37 +0000105 stuff = ("", "r", imp.PY_SOURCE)
106 self.load_module('__main__', fp, pathname, stuff)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000107
108 def load_file(self, pathname):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000109 dir, name = os.path.split(pathname)
110 name, ext = os.path.splitext(name)
Just van Rossum5d0bd1e2002-11-26 09:53:16 +0000111 fp = open(pathname, READ_MODE)
Guido van Rossum912a14c1998-03-05 04:56:37 +0000112 stuff = (ext, "r", imp.PY_SOURCE)
113 self.load_module(name, fp, pathname, stuff)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000114
115 def import_hook(self, name, caller=None, fromlist=None):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000116 self.msg(3, "import_hook", name, caller, fromlist)
117 parent = self.determine_parent(caller)
118 q, tail = self.find_head_package(parent, name)
119 m = self.load_tail(q, tail)
120 if not fromlist:
121 return q
122 if m.__path__:
123 self.ensure_fromlist(m, fromlist)
Thomas Heller318b7b92002-11-26 08:06:50 +0000124 return None
Guido van Rossum75dc4961998-03-05 03:42:00 +0000125
126 def determine_parent(self, caller):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000127 self.msgin(4, "determine_parent", caller)
128 if not caller:
129 self.msgout(4, "determine_parent -> None")
130 return None
131 pname = caller.__name__
132 if caller.__path__:
133 parent = self.modules[pname]
134 assert caller is parent
135 self.msgout(4, "determine_parent ->", parent)
136 return parent
137 if '.' in pname:
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000138 i = pname.rfind('.')
Guido van Rossum912a14c1998-03-05 04:56:37 +0000139 pname = pname[:i]
140 parent = self.modules[pname]
141 assert parent.__name__ == pname
142 self.msgout(4, "determine_parent ->", parent)
143 return parent
144 self.msgout(4, "determine_parent -> None")
145 return None
Guido van Rossum75dc4961998-03-05 03:42:00 +0000146
147 def find_head_package(self, parent, name):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000148 self.msgin(4, "find_head_package", parent, name)
149 if '.' in name:
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000150 i = name.find('.')
Guido van Rossum912a14c1998-03-05 04:56:37 +0000151 head = name[:i]
152 tail = name[i+1:]
153 else:
154 head = name
155 tail = ""
156 if parent:
157 qname = "%s.%s" % (parent.__name__, head)
158 else:
159 qname = head
160 q = self.import_module(head, qname, parent)
161 if q:
162 self.msgout(4, "find_head_package ->", (q, tail))
163 return q, tail
164 if parent:
165 qname = head
166 parent = None
167 q = self.import_module(head, qname, parent)
168 if q:
169 self.msgout(4, "find_head_package ->", (q, tail))
170 return q, tail
171 self.msgout(4, "raise ImportError: No module named", qname)
172 raise ImportError, "No module named " + qname
Guido van Rossum75dc4961998-03-05 03:42:00 +0000173
174 def load_tail(self, q, tail):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000175 self.msgin(4, "load_tail", q, tail)
176 m = q
177 while tail:
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000178 i = tail.find('.')
Guido van Rossum912a14c1998-03-05 04:56:37 +0000179 if i < 0: i = len(tail)
180 head, tail = tail[:i], tail[i+1:]
181 mname = "%s.%s" % (m.__name__, head)
182 m = self.import_module(head, mname, m)
183 if not m:
184 self.msgout(4, "raise ImportError: No module named", mname)
185 raise ImportError, "No module named " + mname
186 self.msgout(4, "load_tail ->", m)
187 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000188
189 def ensure_fromlist(self, m, fromlist, recursive=0):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000190 self.msg(4, "ensure_fromlist", m, fromlist, recursive)
191 for sub in fromlist:
192 if sub == "*":
193 if not recursive:
194 all = self.find_all_submodules(m)
195 if all:
196 self.ensure_fromlist(m, all, 1)
197 elif not hasattr(m, sub):
198 subname = "%s.%s" % (m.__name__, sub)
199 submod = self.import_module(sub, subname, m)
200 if not submod:
201 raise ImportError, "No module named " + subname
Guido van Rossum75dc4961998-03-05 03:42:00 +0000202
203 def find_all_submodules(self, m):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000204 if not m.__path__:
205 return
206 modules = {}
207 suffixes = [".py", ".pyc", ".pyo"]
208 for dir in m.__path__:
209 try:
210 names = os.listdir(dir)
211 except os.error:
212 self.msg(2, "can't list directory", dir)
213 continue
214 for name in names:
215 mod = None
216 for suff in suffixes:
217 n = len(suff)
218 if name[-n:] == suff:
219 mod = name[:-n]
220 break
221 if mod and mod != "__init__":
222 modules[mod] = mod
223 return modules.keys()
Guido van Rossum75dc4961998-03-05 03:42:00 +0000224
225 def import_module(self, partname, fqname, parent):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000226 self.msgin(3, "import_module", partname, fqname, parent)
227 try:
228 m = self.modules[fqname]
229 except KeyError:
230 pass
231 else:
232 self.msgout(3, "import_module ->", m)
233 return m
234 if self.badmodules.has_key(fqname):
235 self.msgout(3, "import_module -> None")
Guido van Rossum8b4b46e1999-11-02 15:46:44 +0000236 if parent:
237 self.badmodules[fqname][parent.__name__] = None
Guido van Rossum912a14c1998-03-05 04:56:37 +0000238 return None
239 try:
240 fp, pathname, stuff = self.find_module(partname,
241 parent and parent.__path__)
242 except ImportError:
243 self.msgout(3, "import_module ->", None)
244 return None
245 try:
246 m = self.load_module(fqname, fp, pathname, stuff)
247 finally:
248 if fp: fp.close()
249 if parent:
250 setattr(parent, partname, m)
251 self.msgout(3, "import_module ->", m)
252 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000253
254 def load_module(self, fqname, fp, pathname, (suffix, mode, type)):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000255 self.msgin(2, "load_module", fqname, fp and "fp", pathname)
256 if type == imp.PKG_DIRECTORY:
257 m = self.load_package(fqname, pathname)
258 self.msgout(2, "load_module ->", m)
259 return m
260 if type == imp.PY_SOURCE:
Guido van Rossum78fc3631998-03-20 17:37:24 +0000261 co = compile(fp.read()+'\n', pathname, 'exec')
Guido van Rossum912a14c1998-03-05 04:56:37 +0000262 elif type == imp.PY_COMPILED:
263 if fp.read(4) != imp.get_magic():
264 self.msgout(2, "raise ImportError: Bad magic number", pathname)
Guido van Rossumce33eb32000-05-02 13:49:13 +0000265 raise ImportError, "Bad magic number in %s" % pathname
Guido van Rossum912a14c1998-03-05 04:56:37 +0000266 fp.read(4)
267 co = marshal.load(fp)
268 else:
269 co = None
270 m = self.add_module(fqname)
Guido van Rossumab045f91998-03-06 19:55:10 +0000271 m.__file__ = pathname
Guido van Rossum912a14c1998-03-05 04:56:37 +0000272 if co:
Guido van Rossum6b767ac2001-03-20 20:43:34 +0000273 if self.replace_paths:
274 co = self.replace_paths_in_code(co)
Guido van Rossum912a14c1998-03-05 04:56:37 +0000275 m.__code__ = co
Guido van Rossum3c51cf21998-03-05 05:15:07 +0000276 self.scan_code(co, m)
Guido van Rossum912a14c1998-03-05 04:56:37 +0000277 self.msgout(2, "load_module ->", m)
278 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000279
Guido van Rossum3c51cf21998-03-05 05:15:07 +0000280 def scan_code(self, co, m):
281 code = co.co_code
282 n = len(code)
283 i = 0
284 lastname = None
285 while i < n:
286 c = code[i]
287 i = i+1
288 op = ord(c)
289 if op >= dis.HAVE_ARGUMENT:
290 oparg = ord(code[i]) + ord(code[i+1])*256
291 i = i+2
292 if op == IMPORT_NAME:
293 name = lastname = co.co_names[oparg]
294 if not self.badmodules.has_key(lastname):
295 try:
296 self.import_hook(name, m)
297 except ImportError, msg:
298 self.msg(2, "ImportError:", str(msg))
Guido van Rossumfdd30281998-12-22 13:44:01 +0000299 if not self.badmodules.has_key(name):
300 self.badmodules[name] = {}
301 self.badmodules[name][m.__name__] = None
Guido van Rossum3c51cf21998-03-05 05:15:07 +0000302 elif op == IMPORT_FROM:
303 name = co.co_names[oparg]
304 assert lastname is not None
305 if not self.badmodules.has_key(lastname):
306 try:
307 self.import_hook(lastname, m, [name])
308 except ImportError, msg:
309 self.msg(2, "ImportError:", str(msg))
310 fullname = lastname + "." + name
Guido van Rossumfdd30281998-12-22 13:44:01 +0000311 if not self.badmodules.has_key(fullname):
312 self.badmodules[fullname] = {}
313 self.badmodules[fullname][m.__name__] = None
Guido van Rossum89990532000-09-15 16:37:42 +0000314 elif op in STORE_OPS:
315 # Skip; each IMPORT_FROM is followed by a STORE_* opcode
316 pass
Guido van Rossum3c51cf21998-03-05 05:15:07 +0000317 else:
318 lastname = None
319 for c in co.co_consts:
320 if isinstance(c, type(co)):
321 self.scan_code(c, m)
322
Guido van Rossum75dc4961998-03-05 03:42:00 +0000323 def load_package(self, fqname, pathname):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000324 self.msgin(2, "load_package", fqname, pathname)
Thomas Hellerc7aaf952002-11-14 18:45:11 +0000325 newname = replacePackageMap.get(fqname)
326 if newname:
327 fqname = newname
Guido van Rossum912a14c1998-03-05 04:56:37 +0000328 m = self.add_module(fqname)
329 m.__file__ = pathname
330 m.__path__ = [pathname]
Guido van Rossumf1b5a0e1998-05-18 20:21:56 +0000331
Guido van Rossume7e632a1998-09-14 16:02:28 +0000332 # As per comment at top of file, simulate runtime __path__ additions.
333 m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
Guido van Rossumf1b5a0e1998-05-18 20:21:56 +0000334
Guido van Rossum912a14c1998-03-05 04:56:37 +0000335 fp, buf, stuff = self.find_module("__init__", m.__path__)
336 self.load_module(fqname, fp, buf, stuff)
337 self.msgout(2, "load_package ->", m)
338 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000339
340 def add_module(self, fqname):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000341 if self.modules.has_key(fqname):
342 return self.modules[fqname]
343 self.modules[fqname] = m = Module(fqname)
344 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000345
346 def find_module(self, name, path):
Guido van Rossum03f7f082001-10-18 19:15:32 +0000347 if path:
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000348 fullname = '.'.join(path)+'.'+name
Guido van Rossum03f7f082001-10-18 19:15:32 +0000349 else:
350 fullname = name
351 if fullname in self.excludes:
352 self.msgout(3, "find_module -> Excluded", fullname)
Guido van Rossum78fc3631998-03-20 17:37:24 +0000353 raise ImportError, name
354
Guido van Rossum912a14c1998-03-05 04:56:37 +0000355 if path is None:
356 if name in sys.builtin_module_names:
357 return (None, None, ("", "", imp.C_BUILTIN))
Guido van Rossum78fc3631998-03-20 17:37:24 +0000358
Guido van Rossum912a14c1998-03-05 04:56:37 +0000359 path = self.path
360 return imp.find_module(name, path)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000361
362 def report(self):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000363 print
364 print " %-25s %s" % ("Name", "File")
365 print " %-25s %s" % ("----", "----")
366 # Print modules found
367 keys = self.modules.keys()
368 keys.sort()
369 for key in keys:
370 m = self.modules[key]
371 if m.__path__:
372 print "P",
373 else:
374 print "m",
375 print "%-25s" % key, m.__file__ or ""
Guido van Rossum75dc4961998-03-05 03:42:00 +0000376
Guido van Rossum912a14c1998-03-05 04:56:37 +0000377 # Print missing modules
378 keys = self.badmodules.keys()
379 keys.sort()
380 for key in keys:
Thomas Wouters7e474022000-07-16 12:04:32 +0000381 # ... but not if they were explicitly excluded.
Guido van Rossumbaf06031998-08-25 14:06:55 +0000382 if key not in self.excludes:
Guido van Rossum38b92eb1998-12-15 15:35:23 +0000383 mods = self.badmodules[key].keys()
384 mods.sort()
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000385 print "?", key, "from", ', '.join(mods)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000386
Guido van Rossum03f7f082001-10-18 19:15:32 +0000387 def any_missing(self):
388 keys = self.badmodules.keys()
389 missing = []
390 for key in keys:
391 if key not in self.excludes:
392 # Missing, and its not supposed to be
393 missing.append(key)
394 return missing
395
Guido van Rossum6b767ac2001-03-20 20:43:34 +0000396 def replace_paths_in_code(self, co):
397 new_filename = original_filename = os.path.normpath(co.co_filename)
398 for f,r in self.replace_paths:
399 if original_filename.startswith(f):
400 new_filename = r+original_filename[len(f):]
401 break
402
403 if self.debug and original_filename not in self.processed_paths:
404 if new_filename!=original_filename:
405 self.msgout(2, "co_filename %r changed to %r" \
406 % (original_filename,new_filename,))
407 else:
408 self.msgout(2, "co_filename %r remains unchanged" \
409 % (original_filename,))
410 self.processed_paths.append(original_filename)
411
412 consts = list(co.co_consts)
413 for i in range(len(consts)):
414 if isinstance(consts[i], type(co)):
415 consts[i] = self.replace_paths_in_code(consts[i])
416
417 return new.code(co.co_argcount, co.co_nlocals, co.co_stacksize,
418 co.co_flags, co.co_code, tuple(consts), co.co_names,
419 co.co_varnames, new_filename, co.co_name,
Neal Norwitz82c72312002-11-12 23:09:12 +0000420 co.co_firstlineno, co.co_lnotab,
421 co.co_freevars, co.co_cellvars)
Guido van Rossum6b767ac2001-03-20 20:43:34 +0000422
Guido van Rossum75dc4961998-03-05 03:42:00 +0000423
424def test():
425 # Parse command line
426 import getopt
427 try:
Guido van Rossumbaf06031998-08-25 14:06:55 +0000428 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
Guido van Rossum75dc4961998-03-05 03:42:00 +0000429 except getopt.error, msg:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000430 print msg
431 return
Guido van Rossum75dc4961998-03-05 03:42:00 +0000432
433 # Process options
434 debug = 1
435 domods = 0
436 addpath = []
Guido van Rossumbaf06031998-08-25 14:06:55 +0000437 exclude = []
Guido van Rossum75dc4961998-03-05 03:42:00 +0000438 for o, a in opts:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000439 if o == '-d':
440 debug = debug + 1
441 if o == '-m':
442 domods = 1
443 if o == '-p':
Walter Dörwaldaaab30e2002-09-11 20:36:02 +0000444 addpath = addpath + a.split(os.pathsep)
Guido van Rossum912a14c1998-03-05 04:56:37 +0000445 if o == '-q':
446 debug = 0
Guido van Rossumbaf06031998-08-25 14:06:55 +0000447 if o == '-x':
448 exclude.append(a)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000449
450 # Provide default arguments
451 if not args:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000452 script = "hello.py"
Guido van Rossum75dc4961998-03-05 03:42:00 +0000453 else:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000454 script = args[0]
Guido van Rossum75dc4961998-03-05 03:42:00 +0000455
456 # Set the path based on sys.path and the script directory
457 path = sys.path[:]
458 path[0] = os.path.dirname(script)
459 path = addpath + path
460 if debug > 1:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000461 print "path:"
462 for item in path:
463 print " ", `item`
Guido van Rossum75dc4961998-03-05 03:42:00 +0000464
465 # Create the module finder and turn its crank
Guido van Rossumbaf06031998-08-25 14:06:55 +0000466 mf = ModuleFinder(path, debug, exclude)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000467 for arg in args[1:]:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000468 if arg == '-m':
469 domods = 1
470 continue
471 if domods:
472 if arg[-2:] == '.*':
473 mf.import_hook(arg[:-2], None, ["*"])
474 else:
475 mf.import_hook(arg)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000476 else:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000477 mf.load_file(arg)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000478 mf.run_script(script)
479 mf.report()
480
481
482if __name__ == '__main__':
483 try:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000484 test()
Guido van Rossum75dc4961998-03-05 03:42:00 +0000485 except KeyboardInterrupt:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000486 print "\n[interrupt]"