blob: 42cbbab038b257fd2a6e37eb48c81027e4cf3f46 [file] [log] [blame]
Guido van Rossum75dc4961998-03-05 03:42:00 +00001"""Find modules used by a script, using introspection."""
2
3import dis
4import imp
5import marshal
6import os
7import re
8import string
9import sys
Guido van Rossum6b767ac2001-03-20 20:43:34 +000010import new
Guido van Rossum75dc4961998-03-05 03:42:00 +000011
Guido van Rossum75dc4961998-03-05 03:42:00 +000012IMPORT_NAME = dis.opname.index('IMPORT_NAME')
13IMPORT_FROM = dis.opname.index('IMPORT_FROM')
Guido van Rossum89990532000-09-15 16:37:42 +000014STORE_NAME = dis.opname.index('STORE_NAME')
15STORE_FAST = dis.opname.index('STORE_FAST')
16STORE_GLOBAL = dis.opname.index('STORE_GLOBAL')
17STORE_OPS = [STORE_NAME, STORE_FAST, STORE_GLOBAL]
Guido van Rossum75dc4961998-03-05 03:42:00 +000018
Guido van Rossumf1b5a0e1998-05-18 20:21:56 +000019# Modulefinder does a good job at simulating Python's, but it can not
20# handle __path__ modifications packages make at runtime. Therefore there
21# is a mechanism whereby you can register extra paths in this map for a
Thomas Wouters7e474022000-07-16 12:04:32 +000022# package, and it will be honored.
Guido van Rossumf1b5a0e1998-05-18 20:21:56 +000023
24# Note this is a mapping is lists of paths.
25packagePathMap = {}
26
27# A Public interface
28def AddPackagePath(packagename, path):
Guido van Rossume7e632a1998-09-14 16:02:28 +000029 paths = packagePathMap.get(packagename, [])
30 paths.append(path)
31 packagePathMap[packagename] = paths
Guido van Rossum75dc4961998-03-05 03:42:00 +000032
33class Module:
34
35 def __init__(self, name, file=None, path=None):
Guido van Rossum912a14c1998-03-05 04:56:37 +000036 self.__name__ = name
37 self.__file__ = file
38 self.__path__ = path
39 self.__code__ = None
Guido van Rossum75dc4961998-03-05 03:42:00 +000040
41 def __repr__(self):
Guido van Rossum912a14c1998-03-05 04:56:37 +000042 s = "Module(%s" % `self.__name__`
43 if self.__file__ is not None:
44 s = s + ", %s" % `self.__file__`
45 if self.__path__ is not None:
46 s = s + ", %s" % `self.__path__`
47 s = s + ")"
48 return s
Guido van Rossum75dc4961998-03-05 03:42:00 +000049
50
51class ModuleFinder:
52
Guido van Rossum6b767ac2001-03-20 20:43:34 +000053 def __init__(self, path=None, debug=0, excludes = [], replace_paths = []):
Guido van Rossum912a14c1998-03-05 04:56:37 +000054 if path is None:
55 path = sys.path
56 self.path = path
57 self.modules = {}
58 self.badmodules = {}
59 self.debug = debug
60 self.indent = 0
Guido van Rossum78fc3631998-03-20 17:37:24 +000061 self.excludes = excludes
Guido van Rossum6b767ac2001-03-20 20:43:34 +000062 self.replace_paths = replace_paths
63 self.processed_paths = [] # Used in debugging only
Guido van Rossum75dc4961998-03-05 03:42:00 +000064
65 def msg(self, level, str, *args):
Guido van Rossum912a14c1998-03-05 04:56:37 +000066 if level <= self.debug:
67 for i in range(self.indent):
68 print " ",
69 print str,
70 for arg in args:
71 print repr(arg),
72 print
Guido van Rossum75dc4961998-03-05 03:42:00 +000073
74 def msgin(self, *args):
Guido van Rossum912a14c1998-03-05 04:56:37 +000075 level = args[0]
76 if level <= self.debug:
77 self.indent = self.indent + 1
78 apply(self.msg, args)
Guido van Rossum75dc4961998-03-05 03:42:00 +000079
80 def msgout(self, *args):
Guido van Rossum912a14c1998-03-05 04:56:37 +000081 level = args[0]
82 if level <= self.debug:
83 self.indent = self.indent - 1
84 apply(self.msg, args)
Guido van Rossum75dc4961998-03-05 03:42:00 +000085
86 def run_script(self, pathname):
Guido van Rossum912a14c1998-03-05 04:56:37 +000087 self.msg(2, "run_script", pathname)
88 fp = open(pathname)
89 stuff = ("", "r", imp.PY_SOURCE)
90 self.load_module('__main__', fp, pathname, stuff)
Guido van Rossum75dc4961998-03-05 03:42:00 +000091
92 def load_file(self, pathname):
Guido van Rossum912a14c1998-03-05 04:56:37 +000093 dir, name = os.path.split(pathname)
94 name, ext = os.path.splitext(name)
95 fp = open(pathname)
96 stuff = (ext, "r", imp.PY_SOURCE)
97 self.load_module(name, fp, pathname, stuff)
Guido van Rossum75dc4961998-03-05 03:42:00 +000098
99 def import_hook(self, name, caller=None, fromlist=None):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000100 self.msg(3, "import_hook", name, caller, fromlist)
101 parent = self.determine_parent(caller)
102 q, tail = self.find_head_package(parent, name)
103 m = self.load_tail(q, tail)
104 if not fromlist:
105 return q
106 if m.__path__:
107 self.ensure_fromlist(m, fromlist)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000108
109 def determine_parent(self, caller):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000110 self.msgin(4, "determine_parent", caller)
111 if not caller:
112 self.msgout(4, "determine_parent -> None")
113 return None
114 pname = caller.__name__
115 if caller.__path__:
116 parent = self.modules[pname]
117 assert caller is parent
118 self.msgout(4, "determine_parent ->", parent)
119 return parent
120 if '.' in pname:
121 i = string.rfind(pname, '.')
122 pname = pname[:i]
123 parent = self.modules[pname]
124 assert parent.__name__ == pname
125 self.msgout(4, "determine_parent ->", parent)
126 return parent
127 self.msgout(4, "determine_parent -> None")
128 return None
Guido van Rossum75dc4961998-03-05 03:42:00 +0000129
130 def find_head_package(self, parent, name):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000131 self.msgin(4, "find_head_package", parent, name)
132 if '.' in name:
133 i = string.find(name, '.')
134 head = name[:i]
135 tail = name[i+1:]
136 else:
137 head = name
138 tail = ""
139 if parent:
140 qname = "%s.%s" % (parent.__name__, head)
141 else:
142 qname = head
143 q = self.import_module(head, qname, parent)
144 if q:
145 self.msgout(4, "find_head_package ->", (q, tail))
146 return q, tail
147 if parent:
148 qname = head
149 parent = None
150 q = self.import_module(head, qname, parent)
151 if q:
152 self.msgout(4, "find_head_package ->", (q, tail))
153 return q, tail
154 self.msgout(4, "raise ImportError: No module named", qname)
155 raise ImportError, "No module named " + qname
Guido van Rossum75dc4961998-03-05 03:42:00 +0000156
157 def load_tail(self, q, tail):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000158 self.msgin(4, "load_tail", q, tail)
159 m = q
160 while tail:
161 i = string.find(tail, '.')
162 if i < 0: i = len(tail)
163 head, tail = tail[:i], tail[i+1:]
164 mname = "%s.%s" % (m.__name__, head)
165 m = self.import_module(head, mname, m)
166 if not m:
167 self.msgout(4, "raise ImportError: No module named", mname)
168 raise ImportError, "No module named " + mname
169 self.msgout(4, "load_tail ->", m)
170 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000171
172 def ensure_fromlist(self, m, fromlist, recursive=0):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000173 self.msg(4, "ensure_fromlist", m, fromlist, recursive)
174 for sub in fromlist:
175 if sub == "*":
176 if not recursive:
177 all = self.find_all_submodules(m)
178 if all:
179 self.ensure_fromlist(m, all, 1)
180 elif not hasattr(m, sub):
181 subname = "%s.%s" % (m.__name__, sub)
182 submod = self.import_module(sub, subname, m)
183 if not submod:
184 raise ImportError, "No module named " + subname
Guido van Rossum75dc4961998-03-05 03:42:00 +0000185
186 def find_all_submodules(self, m):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000187 if not m.__path__:
188 return
189 modules = {}
190 suffixes = [".py", ".pyc", ".pyo"]
191 for dir in m.__path__:
192 try:
193 names = os.listdir(dir)
194 except os.error:
195 self.msg(2, "can't list directory", dir)
196 continue
197 for name in names:
198 mod = None
199 for suff in suffixes:
200 n = len(suff)
201 if name[-n:] == suff:
202 mod = name[:-n]
203 break
204 if mod and mod != "__init__":
205 modules[mod] = mod
206 return modules.keys()
Guido van Rossum75dc4961998-03-05 03:42:00 +0000207
208 def import_module(self, partname, fqname, parent):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000209 self.msgin(3, "import_module", partname, fqname, parent)
210 try:
211 m = self.modules[fqname]
212 except KeyError:
213 pass
214 else:
215 self.msgout(3, "import_module ->", m)
216 return m
217 if self.badmodules.has_key(fqname):
218 self.msgout(3, "import_module -> None")
Guido van Rossum8b4b46e1999-11-02 15:46:44 +0000219 if parent:
220 self.badmodules[fqname][parent.__name__] = None
Guido van Rossum912a14c1998-03-05 04:56:37 +0000221 return None
222 try:
223 fp, pathname, stuff = self.find_module(partname,
224 parent and parent.__path__)
225 except ImportError:
226 self.msgout(3, "import_module ->", None)
227 return None
228 try:
229 m = self.load_module(fqname, fp, pathname, stuff)
230 finally:
231 if fp: fp.close()
232 if parent:
233 setattr(parent, partname, m)
234 self.msgout(3, "import_module ->", m)
235 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000236
237 def load_module(self, fqname, fp, pathname, (suffix, mode, type)):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000238 self.msgin(2, "load_module", fqname, fp and "fp", pathname)
239 if type == imp.PKG_DIRECTORY:
240 m = self.load_package(fqname, pathname)
241 self.msgout(2, "load_module ->", m)
242 return m
243 if type == imp.PY_SOURCE:
Guido van Rossum78fc3631998-03-20 17:37:24 +0000244 co = compile(fp.read()+'\n', pathname, 'exec')
Guido van Rossum912a14c1998-03-05 04:56:37 +0000245 elif type == imp.PY_COMPILED:
246 if fp.read(4) != imp.get_magic():
247 self.msgout(2, "raise ImportError: Bad magic number", pathname)
Guido van Rossumce33eb32000-05-02 13:49:13 +0000248 raise ImportError, "Bad magic number in %s" % pathname
Guido van Rossum912a14c1998-03-05 04:56:37 +0000249 fp.read(4)
250 co = marshal.load(fp)
251 else:
252 co = None
253 m = self.add_module(fqname)
Guido van Rossumab045f91998-03-06 19:55:10 +0000254 m.__file__ = pathname
Guido van Rossum912a14c1998-03-05 04:56:37 +0000255 if co:
Guido van Rossum6b767ac2001-03-20 20:43:34 +0000256 if self.replace_paths:
257 co = self.replace_paths_in_code(co)
Guido van Rossum912a14c1998-03-05 04:56:37 +0000258 m.__code__ = co
Guido van Rossum3c51cf21998-03-05 05:15:07 +0000259 self.scan_code(co, m)
Guido van Rossum912a14c1998-03-05 04:56:37 +0000260 self.msgout(2, "load_module ->", m)
261 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000262
Guido van Rossum3c51cf21998-03-05 05:15:07 +0000263 def scan_code(self, co, m):
264 code = co.co_code
265 n = len(code)
266 i = 0
267 lastname = None
268 while i < n:
269 c = code[i]
270 i = i+1
271 op = ord(c)
272 if op >= dis.HAVE_ARGUMENT:
273 oparg = ord(code[i]) + ord(code[i+1])*256
274 i = i+2
275 if op == IMPORT_NAME:
276 name = lastname = co.co_names[oparg]
277 if not self.badmodules.has_key(lastname):
278 try:
279 self.import_hook(name, m)
280 except ImportError, msg:
281 self.msg(2, "ImportError:", str(msg))
Guido van Rossumfdd30281998-12-22 13:44:01 +0000282 if not self.badmodules.has_key(name):
283 self.badmodules[name] = {}
284 self.badmodules[name][m.__name__] = None
Guido van Rossum3c51cf21998-03-05 05:15:07 +0000285 elif op == IMPORT_FROM:
286 name = co.co_names[oparg]
287 assert lastname is not None
288 if not self.badmodules.has_key(lastname):
289 try:
290 self.import_hook(lastname, m, [name])
291 except ImportError, msg:
292 self.msg(2, "ImportError:", str(msg))
293 fullname = lastname + "." + name
Guido van Rossumfdd30281998-12-22 13:44:01 +0000294 if not self.badmodules.has_key(fullname):
295 self.badmodules[fullname] = {}
296 self.badmodules[fullname][m.__name__] = None
Guido van Rossum89990532000-09-15 16:37:42 +0000297 elif op in STORE_OPS:
298 # Skip; each IMPORT_FROM is followed by a STORE_* opcode
299 pass
Guido van Rossum3c51cf21998-03-05 05:15:07 +0000300 else:
301 lastname = None
302 for c in co.co_consts:
303 if isinstance(c, type(co)):
304 self.scan_code(c, m)
305
Guido van Rossum75dc4961998-03-05 03:42:00 +0000306 def load_package(self, fqname, pathname):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000307 self.msgin(2, "load_package", fqname, pathname)
308 m = self.add_module(fqname)
309 m.__file__ = pathname
310 m.__path__ = [pathname]
Guido van Rossumf1b5a0e1998-05-18 20:21:56 +0000311
Guido van Rossume7e632a1998-09-14 16:02:28 +0000312 # As per comment at top of file, simulate runtime __path__ additions.
313 m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
Guido van Rossumf1b5a0e1998-05-18 20:21:56 +0000314
Guido van Rossum912a14c1998-03-05 04:56:37 +0000315 fp, buf, stuff = self.find_module("__init__", m.__path__)
316 self.load_module(fqname, fp, buf, stuff)
317 self.msgout(2, "load_package ->", m)
318 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000319
320 def add_module(self, fqname):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000321 if self.modules.has_key(fqname):
322 return self.modules[fqname]
323 self.modules[fqname] = m = Module(fqname)
324 return m
Guido van Rossum75dc4961998-03-05 03:42:00 +0000325
326 def find_module(self, name, path):
Guido van Rossum78fc3631998-03-20 17:37:24 +0000327 if name in self.excludes:
328 self.msgout(3, "find_module -> Excluded")
329 raise ImportError, name
330
Guido van Rossum912a14c1998-03-05 04:56:37 +0000331 if path is None:
332 if name in sys.builtin_module_names:
333 return (None, None, ("", "", imp.C_BUILTIN))
Guido van Rossum78fc3631998-03-20 17:37:24 +0000334
335 # Emulate the Registered Module support on Windows.
Mark Hammondd3d7bb12000-11-06 02:49:27 +0000336 if sys.platform=="win32":
337 import _winreg
338 from _winreg import HKEY_LOCAL_MACHINE
Guido van Rossum78fc3631998-03-20 17:37:24 +0000339 try:
Mark Hammondd3d7bb12000-11-06 02:49:27 +0000340 pathname = _winreg.QueryValueEx(HKEY_LOCAL_MACHINE, \
341 "Software\\Python\\PythonCore\\%s\\Modules\\%s" % (sys.winver, name))
Guido van Rossum78fc3631998-03-20 17:37:24 +0000342 fp = open(pathname, "rb")
343 # XXX - To do - remove the hard code of C_EXTENSION.
344 stuff = "", "rb", imp.C_EXTENSION
345 return fp, pathname, stuff
Mark Hammondd3d7bb12000-11-06 02:49:27 +0000346 except _winreg.error:
Guido van Rossum78fc3631998-03-20 17:37:24 +0000347 pass
348
Guido van Rossum912a14c1998-03-05 04:56:37 +0000349 path = self.path
350 return imp.find_module(name, path)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000351
352 def report(self):
Guido van Rossum912a14c1998-03-05 04:56:37 +0000353 print
354 print " %-25s %s" % ("Name", "File")
355 print " %-25s %s" % ("----", "----")
356 # Print modules found
357 keys = self.modules.keys()
358 keys.sort()
359 for key in keys:
360 m = self.modules[key]
361 if m.__path__:
362 print "P",
363 else:
364 print "m",
365 print "%-25s" % key, m.__file__ or ""
Guido van Rossum75dc4961998-03-05 03:42:00 +0000366
Guido van Rossum912a14c1998-03-05 04:56:37 +0000367 # Print missing modules
368 keys = self.badmodules.keys()
369 keys.sort()
370 for key in keys:
Thomas Wouters7e474022000-07-16 12:04:32 +0000371 # ... but not if they were explicitly excluded.
Guido van Rossumbaf06031998-08-25 14:06:55 +0000372 if key not in self.excludes:
Guido van Rossum38b92eb1998-12-15 15:35:23 +0000373 mods = self.badmodules[key].keys()
374 mods.sort()
375 print "?", key, "from", string.join(mods, ', ')
Guido van Rossum75dc4961998-03-05 03:42:00 +0000376
Guido van Rossum6b767ac2001-03-20 20:43:34 +0000377 def replace_paths_in_code(self, co):
378 new_filename = original_filename = os.path.normpath(co.co_filename)
379 for f,r in self.replace_paths:
380 if original_filename.startswith(f):
381 new_filename = r+original_filename[len(f):]
382 break
383
384 if self.debug and original_filename not in self.processed_paths:
385 if new_filename!=original_filename:
386 self.msgout(2, "co_filename %r changed to %r" \
387 % (original_filename,new_filename,))
388 else:
389 self.msgout(2, "co_filename %r remains unchanged" \
390 % (original_filename,))
391 self.processed_paths.append(original_filename)
392
393 consts = list(co.co_consts)
394 for i in range(len(consts)):
395 if isinstance(consts[i], type(co)):
396 consts[i] = self.replace_paths_in_code(consts[i])
397
398 return new.code(co.co_argcount, co.co_nlocals, co.co_stacksize,
399 co.co_flags, co.co_code, tuple(consts), co.co_names,
400 co.co_varnames, new_filename, co.co_name,
401 co.co_firstlineno, co.co_lnotab)
402
Guido van Rossum75dc4961998-03-05 03:42:00 +0000403
404def test():
405 # Parse command line
406 import getopt
407 try:
Guido van Rossumbaf06031998-08-25 14:06:55 +0000408 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
Guido van Rossum75dc4961998-03-05 03:42:00 +0000409 except getopt.error, msg:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000410 print msg
411 return
Guido van Rossum75dc4961998-03-05 03:42:00 +0000412
413 # Process options
414 debug = 1
415 domods = 0
416 addpath = []
Guido van Rossumbaf06031998-08-25 14:06:55 +0000417 exclude = []
Guido van Rossum75dc4961998-03-05 03:42:00 +0000418 for o, a in opts:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000419 if o == '-d':
420 debug = debug + 1
421 if o == '-m':
422 domods = 1
423 if o == '-p':
424 addpath = addpath + string.split(a, os.pathsep)
425 if o == '-q':
426 debug = 0
Guido van Rossumbaf06031998-08-25 14:06:55 +0000427 if o == '-x':
428 exclude.append(a)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000429
430 # Provide default arguments
431 if not args:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000432 script = "hello.py"
Guido van Rossum75dc4961998-03-05 03:42:00 +0000433 else:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000434 script = args[0]
Guido van Rossum75dc4961998-03-05 03:42:00 +0000435
436 # Set the path based on sys.path and the script directory
437 path = sys.path[:]
438 path[0] = os.path.dirname(script)
439 path = addpath + path
440 if debug > 1:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000441 print "path:"
442 for item in path:
443 print " ", `item`
Guido van Rossum75dc4961998-03-05 03:42:00 +0000444
445 # Create the module finder and turn its crank
Guido van Rossumbaf06031998-08-25 14:06:55 +0000446 mf = ModuleFinder(path, debug, exclude)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000447 for arg in args[1:]:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000448 if arg == '-m':
449 domods = 1
450 continue
451 if domods:
452 if arg[-2:] == '.*':
453 mf.import_hook(arg[:-2], None, ["*"])
454 else:
455 mf.import_hook(arg)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000456 else:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000457 mf.load_file(arg)
Guido van Rossum75dc4961998-03-05 03:42:00 +0000458 mf.run_script(script)
459 mf.report()
460
461
462if __name__ == '__main__':
463 try:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000464 test()
Guido van Rossum75dc4961998-03-05 03:42:00 +0000465 except KeyboardInterrupt:
Guido van Rossum912a14c1998-03-05 04:56:37 +0000466 print "\n[interrupt]"