Greg Stein | 281b8d8 | 1999-11-07 12:54:45 +0000 | [diff] [blame] | 1 | # |
| 2 | # imputil.py |
| 3 | # |
| 4 | # Written by Greg Stein. Public Domain. |
| 5 | # No Copyright, no Rights Reserved, and no Warranties. |
| 6 | # |
| 7 | # Utilities to help out with custom import mechanisms. |
| 8 | # |
| 9 | # Additional modifications were contribed by Marc-Andre Lemburg and |
| 10 | # Gordon McMillan. |
| 11 | # |
Greg Stein | d4c64ba | 1999-11-07 13:14:58 +0000 | [diff] [blame] | 12 | # This module is maintained by Greg and is available at: |
| 13 | # http://www.lyra.org/greg/python/imputil.py |
| 14 | # |
| 15 | # Since this isn't in the Python distribution yet, we'll use the CVS ID |
| 16 | # for tracking: |
| 17 | # $Id$ |
| 18 | # |
Greg Stein | 281b8d8 | 1999-11-07 12:54:45 +0000 | [diff] [blame] | 19 | |
| 20 | __version__ = '0.3' |
| 21 | |
| 22 | # note: avoid importing non-builtin modules |
| 23 | import imp |
| 24 | import sys |
| 25 | import strop |
| 26 | import __builtin__ ### why this instead of just using __builtins__ ?? |
| 27 | |
| 28 | # for the DirectoryImporter |
| 29 | import struct |
| 30 | import marshal |
| 31 | |
| 32 | class Importer: |
| 33 | "Base class for replacing standard import functions." |
| 34 | |
| 35 | def install(self): |
| 36 | self.__chain_import = __builtin__.__import__ |
| 37 | self.__chain_reload = __builtin__.reload |
| 38 | __builtin__.__import__ = self._import_hook |
| 39 | __builtin__.reload = self._reload_hook |
| 40 | |
| 41 | ###################################################################### |
| 42 | # |
| 43 | # PRIVATE METHODS |
| 44 | # |
| 45 | def _import_hook(self, name, globals=None, locals=None, fromlist=None): |
| 46 | """Python calls this hook to locate and import a module. |
| 47 | |
| 48 | This method attempts to load the (dotted) module name. If it cannot |
| 49 | find it, then it delegates the import to the next import hook in the |
| 50 | chain (where "next" is defined as the import hook that was in place |
| 51 | at the time this Importer instance was installed). |
| 52 | """ |
| 53 | |
| 54 | ### insert a fast-path check for whether the module is already |
| 55 | ### loaded? use a variant of _determine_import_context() which |
| 56 | ### returns a context regardless of Importer used. generate an |
| 57 | ### fqname and look in sys.modules for it. |
| 58 | |
| 59 | # determine the context of this import |
| 60 | parent = self._determine_import_context(globals) |
| 61 | |
| 62 | # import the module within the context, or from the default context |
| 63 | top, tail = self._import_top_module(parent, name) |
| 64 | if top is None: |
| 65 | # the module was not found; delegate to the next import hook |
| 66 | return self.__chain_import(name, globals, locals, fromlist) |
| 67 | |
| 68 | # the top module may be under the control of a different importer. |
| 69 | # if so, then defer to that importer for completion of the import. |
| 70 | # note it may be self, or is undefined so we (self) may as well |
| 71 | # finish the import. |
| 72 | importer = top.__dict__.get('__importer__', self) |
| 73 | return importer._finish_import(top, tail, fromlist) |
| 74 | |
| 75 | def _finish_import(self, top, tail, fromlist): |
| 76 | # if "a.b.c" was provided, then load the ".b.c" portion down from |
| 77 | # below the top-level module. |
| 78 | bottom = self._load_tail(top, tail) |
| 79 | |
| 80 | # if the form is "import a.b.c", then return "a" |
| 81 | if not fromlist: |
| 82 | # no fromlist: return the top of the import tree |
| 83 | return top |
| 84 | |
| 85 | # the top module was imported by self, or it was not imported through |
| 86 | # the Importer mechanism and self is simply handling the import of |
| 87 | # the sub-modules and fromlist. |
| 88 | # |
| 89 | # this means that the bottom module was also imported by self, or we |
| 90 | # are handling things in the absence of a prior Importer |
| 91 | # |
| 92 | # ### why the heck are we handling it? what is the example scenario |
| 93 | # ### where this happens? note that we can't determine is_package() |
| 94 | # ### for non-Importer modules. |
| 95 | # |
| 96 | # since we imported/handled the bottom module, this means that we can |
| 97 | # also handle its fromlist (and reliably determine is_package()). |
| 98 | |
| 99 | # if the bottom node is a package, then (potentially) import some modules. |
| 100 | # |
| 101 | # note: if it is not a package, then "fromlist" refers to names in |
| 102 | # the bottom module rather than modules. |
| 103 | # note: for a mix of names and modules in the fromlist, we will |
| 104 | # import all modules and insert those into the namespace of |
| 105 | # the package module. Python will pick up all fromlist names |
| 106 | # from the bottom (package) module; some will be modules that |
| 107 | # we imported and stored in the namespace, others are expected |
| 108 | # to be present already. |
| 109 | if self._is_package(bottom.__dict__): |
| 110 | self._import_fromlist(bottom, fromlist) |
| 111 | |
| 112 | # if the form is "from a.b import c, d" then return "b" |
| 113 | return bottom |
| 114 | |
| 115 | def _reload_hook(self, module): |
| 116 | "Python calls this hook to reload a module." |
| 117 | |
| 118 | # reloading of a module may or may not be possible (depending on the |
| 119 | # importer), but at least we can validate that it's ours to reload |
| 120 | importer = module.__dict__.get('__importer__', None) |
| 121 | if importer is not self: |
| 122 | return self.__chain_reload(module) |
| 123 | |
| 124 | # okay. it is ours, but we don't know what to do (yet) |
| 125 | ### we should blast the module dict and do another get_code(). need to |
| 126 | ### flesh this out and add proper docco... |
| 127 | raise SystemError, "reload not yet implemented" |
| 128 | |
| 129 | def _determine_import_context(self, globals): |
| 130 | """Returns the context in which a module should be imported. |
| 131 | |
| 132 | The context could be a loaded (package) module and the imported module |
| 133 | will be looked for within that package. The context could also be None, |
| 134 | meaning there is no context -- the module should be looked for as a |
| 135 | "top-level" module. |
| 136 | """ |
| 137 | |
| 138 | if not globals or \ |
| 139 | globals.get('__importer__', None) is not self: |
| 140 | # globals does not refer to one of our modules or packages. |
| 141 | # That implies there is no relative import context, and it |
| 142 | # should just pick it off the standard path. |
| 143 | return None |
| 144 | |
| 145 | # The globals refer to a module or package of ours. It will define |
| 146 | # the context of the new import. Get the module/package fqname. |
| 147 | parent_fqname = globals['__name__'] |
| 148 | |
| 149 | # for a package, return itself (imports refer to pkg contents) |
| 150 | if self._is_package(globals): |
| 151 | parent = sys.modules[parent_fqname] |
| 152 | assert globals is parent.__dict__ |
| 153 | return parent |
| 154 | |
| 155 | i = strop.rfind(parent_fqname, '.') |
| 156 | |
| 157 | # a module outside of a package has no particular import context |
| 158 | if i == -1: |
| 159 | return None |
| 160 | |
| 161 | # for a module in a package, return the package (imports refer to siblings) |
| 162 | parent_fqname = parent_fqname[:i] |
| 163 | parent = sys.modules[parent_fqname] |
| 164 | assert parent.__name__ == parent_fqname |
| 165 | return parent |
| 166 | |
| 167 | def _import_top_module(self, parent, name): |
| 168 | """Locate the top of the import tree (relative or absolute). |
| 169 | |
| 170 | parent defines the context in which the import should occur. See |
| 171 | _determine_import_context() for details. |
| 172 | |
| 173 | Returns a tuple (module, tail). module is the loaded (top-level) module, |
| 174 | or None if the module is not found. tail is the remaining portion of |
| 175 | the dotted name. |
| 176 | """ |
| 177 | i = strop.find(name, '.') |
| 178 | if i == -1: |
| 179 | head = name |
| 180 | tail = "" |
| 181 | else: |
| 182 | head = name[:i] |
| 183 | tail = name[i+1:] |
| 184 | if parent: |
| 185 | fqname = "%s.%s" % (parent.__name__, head) |
| 186 | else: |
| 187 | fqname = head |
| 188 | module = self._import_one(parent, head, fqname) |
| 189 | if module: |
| 190 | # the module was relative, or no context existed (the module was |
| 191 | # simply found on the path). |
| 192 | return module, tail |
| 193 | if parent: |
| 194 | # we tried relative, now try an absolute import (from the path) |
| 195 | module = self._import_one(None, head, head) |
| 196 | if module: |
| 197 | return module, tail |
| 198 | |
| 199 | # the module wasn't found |
| 200 | return None, None |
| 201 | |
| 202 | def _import_one(self, parent, modname, fqname): |
| 203 | "Import a single module." |
| 204 | |
| 205 | # has the module already been imported? |
| 206 | try: |
| 207 | return sys.modules[fqname] |
| 208 | except KeyError: |
| 209 | pass |
| 210 | |
| 211 | # load the module's code, or fetch the module itself |
| 212 | result = self.get_code(parent, modname, fqname) |
| 213 | if result is None: |
| 214 | return None |
| 215 | |
| 216 | # did get_code() return an actual module? (rather than a code object) |
| 217 | is_module = type(result[1]) is type(sys) |
| 218 | |
| 219 | # use the returned module, or create a new one to exec code into |
| 220 | if is_module: |
| 221 | module = result[1] |
| 222 | else: |
| 223 | module = imp.new_module(fqname) |
| 224 | |
| 225 | ### record packages a bit differently?? |
| 226 | module.__importer__ = self |
| 227 | module.__ispkg__ = result[0] |
| 228 | |
| 229 | # if present, the third item is a set of values to insert into the module |
| 230 | if len(result) > 2: |
| 231 | module.__dict__.update(result[2]) |
| 232 | |
| 233 | # the module is almost ready... make it visible |
| 234 | sys.modules[fqname] = module |
| 235 | |
| 236 | # execute the code within the module's namespace |
| 237 | if not is_module: |
| 238 | exec result[1] in module.__dict__ |
| 239 | |
| 240 | # insert the module into its parent |
| 241 | if parent: |
| 242 | setattr(parent, modname, module) |
| 243 | return module |
| 244 | |
| 245 | def _load_tail(self, m, tail): |
| 246 | """Import the rest of the modules, down from the top-level module. |
| 247 | |
| 248 | Returns the last module in the dotted list of modules. |
| 249 | """ |
| 250 | if tail: |
| 251 | for part in strop.splitfields(tail, '.'): |
| 252 | fqname = "%s.%s" % (m.__name__, part) |
| 253 | m = self._import_one(m, part, fqname) |
| 254 | if not m: |
| 255 | raise ImportError, "No module named " + fqname |
| 256 | return m |
| 257 | |
| 258 | def _import_fromlist(self, package, fromlist): |
| 259 | 'Import any sub-modules in the "from" list.' |
| 260 | |
| 261 | # if '*' is present in the fromlist, then look for the '__all__' variable |
| 262 | # to find additional items (modules) to import. |
| 263 | if '*' in fromlist: |
| 264 | fromlist = list(fromlist) + list(package.__dict__.get('__all__', [])) |
| 265 | |
| 266 | for sub in fromlist: |
| 267 | # if the name is already present, then don't try to import it (it |
| 268 | # might not be a module!). |
| 269 | if sub != '*' and not hasattr(package, sub): |
| 270 | subname = "%s.%s" % (package.__name__, sub) |
| 271 | submod = self._import_one(package, sub, subname) |
| 272 | if not submod: |
| 273 | raise ImportError, "cannot import name " + subname |
| 274 | |
| 275 | def _is_package(self, module_dict): |
| 276 | """Determine if a given module (dictionary) specifies a package. |
| 277 | |
| 278 | The package status is in the module-level name __ispkg__. The module |
| 279 | must also have been imported by self, so that we can reliably apply |
| 280 | semantic meaning to __ispkg__. |
| 281 | |
| 282 | ### weaken the test to issubclass(Importer)? |
| 283 | """ |
| 284 | return module_dict.get('__importer__', None) is self and \ |
| 285 | module_dict['__ispkg__'] |
| 286 | |
| 287 | ###################################################################### |
| 288 | # |
| 289 | # METHODS TO OVERRIDE |
| 290 | # |
| 291 | def get_code(self, parent, modname, fqname): |
| 292 | """Find and retrieve the code for the given module. |
| 293 | |
| 294 | parent specifies a parent module to define a context for importing. It |
| 295 | may be None, indicating no particular context for the search. |
| 296 | |
| 297 | modname specifies a single module (not dotted) within the parent. |
| 298 | |
| 299 | fqname specifies the fully-qualified module name. This is a (potentially) |
| 300 | dotted name from the "root" of the module namespace down to the modname. |
| 301 | If there is no parent, then modname==fqname. |
| 302 | |
| 303 | This method should return None, a 2-tuple, or a 3-tuple. |
| 304 | |
| 305 | * If the module was not found, then None should be returned. |
| 306 | |
| 307 | * The first item of the 2- or 3-tuple should be the integer 0 or 1, |
| 308 | specifying whether the module that was found is a package or not. |
| 309 | |
| 310 | * The second item is the code object for the module (it will be |
| 311 | executed within the new module's namespace). This item can also |
| 312 | be a fully-loaded module object (e.g. loaded from a shared lib). |
| 313 | |
| 314 | * If present, the third item is a dictionary of name/value pairs that |
| 315 | will be inserted into new module before the code object is executed. |
| 316 | This provided in case the module's code expects certain values (such |
| 317 | as where the module was found). When the second item is a module |
| 318 | object, then these names/values will be inserted *after* the module |
| 319 | has been loaded/initialized. |
| 320 | """ |
| 321 | raise RuntimeError, "get_code not implemented" |
| 322 | |
| 323 | |
| 324 | ###################################################################### |
| 325 | # |
| 326 | # Simple function-based importer |
| 327 | # |
| 328 | class FuncImporter(Importer): |
| 329 | "Importer subclass to use a supplied function rather than method overrides." |
| 330 | def __init__(self, func): |
| 331 | self.func = func |
| 332 | def get_code(self, parent, modname, fqname): |
| 333 | return self.func(parent, modname, fqname) |
| 334 | |
| 335 | def install_with(func): |
| 336 | FuncImporter(func).install() |
| 337 | |
| 338 | |
| 339 | ###################################################################### |
| 340 | # |
| 341 | # Base class for archive-based importing |
| 342 | # |
| 343 | class PackageArchiveImporter(Importer): |
| 344 | "Importer subclass to import from (file) archives." |
| 345 | |
| 346 | def get_code(self, parent, modname, fqname): |
| 347 | if parent: |
| 348 | # if a parent "package" is provided, then we are importing a sub-file |
| 349 | # from the archive. |
| 350 | result = self.get_subfile(parent.__archive__, modname) |
| 351 | if result is None: |
| 352 | return None |
| 353 | if type(result) == type(()): |
| 354 | return (0,) + result |
| 355 | return 0, result |
| 356 | |
| 357 | # no parent was provided, so the archive should exist somewhere on the |
| 358 | # default "path". |
| 359 | archive = self.get_archive(modname) |
| 360 | if archive is None: |
| 361 | return None |
| 362 | return 1, "", {'__archive__':archive} |
| 363 | |
| 364 | def get_archive(self, modname): |
| 365 | """Get an archive of modules. |
| 366 | |
| 367 | This method should locate an archive and return a value which can be |
| 368 | used by get_subfile to load modules from it. The value may be a simple |
| 369 | pathname, an open file, or a complex object that caches information |
| 370 | for future imports. |
| 371 | |
| 372 | Return None if the archive was not found. |
| 373 | """ |
| 374 | raise RuntimeError, "get_archive not implemented" |
| 375 | |
| 376 | def get_subfile(self, archive, modname): |
| 377 | """Get code from a subfile in the specified archive. |
| 378 | |
| 379 | Given the specified archive (as returned by get_archive()), locate |
| 380 | and return a code object for the specified module name. |
| 381 | |
| 382 | A 2-tuple may be returned, consisting of a code object and a dict |
| 383 | of name/values to place into the target module. |
| 384 | |
| 385 | Return None if the subfile was not found. |
| 386 | """ |
| 387 | raise RuntimeError, "get_subfile not implemented" |
| 388 | |
| 389 | |
| 390 | class PackageArchive(PackageArchiveImporter): |
| 391 | "PackageArchiveImporter subclass that refers to a specific archive." |
| 392 | |
| 393 | def __init__(self, modname, archive_pathname): |
| 394 | self.__modname = modname |
| 395 | self.__path = archive_pathname |
| 396 | |
| 397 | def get_archive(self, modname): |
| 398 | if modname == self.__modname: |
| 399 | return self.__path |
| 400 | return None |
| 401 | |
| 402 | # get_subfile is passed the full pathname of the archive |
| 403 | |
| 404 | |
| 405 | ###################################################################### |
| 406 | # |
| 407 | # Emulate the standard directory-based import mechanism |
| 408 | # |
| 409 | |
| 410 | class DirectoryImporter(Importer): |
| 411 | "Importer subclass to emulate the standard importer." |
| 412 | |
| 413 | def __init__(self, dir): |
| 414 | self.dir = dir |
| 415 | self.ext_char = __debug__ and 'c' or 'o' |
| 416 | self.ext = '.py' + self.ext_char |
| 417 | |
| 418 | def get_code(self, parent, modname, fqname): |
| 419 | if parent: |
| 420 | dir = parent.__pkgdir__ |
| 421 | else: |
| 422 | dir = self.dir |
| 423 | |
| 424 | # pull the os module from our instance data. we don't do this at the |
| 425 | # top-level, because it isn't a builtin module (and we want to defer |
| 426 | # loading non-builtins until as late as possible). |
| 427 | try: |
| 428 | os = self.os |
| 429 | except AttributeError: |
| 430 | import os |
| 431 | self.os = os |
| 432 | |
| 433 | pathname = os.path.join(dir, modname) |
| 434 | if os.path.isdir(pathname): |
| 435 | values = { '__pkgdir__' : pathname } |
| 436 | ispkg = 1 |
| 437 | pathname = os.path.join(pathname, '__init__') |
| 438 | else: |
| 439 | values = { } |
| 440 | ispkg = 0 |
| 441 | |
| 442 | t_py = self._timestamp(pathname + '.py') |
| 443 | t_pyc = self._timestamp(pathname + self.ext) |
| 444 | if t_py is None and t_pyc is None: |
| 445 | return None |
| 446 | code = None |
| 447 | if t_py is None or (t_pyc is not None and t_pyc >= t_py): |
| 448 | f = open(pathname + self.ext, 'rb') |
| 449 | if f.read(4) == imp.get_magic(): |
| 450 | t = struct.unpack('<I', f.read(4))[0] |
| 451 | if t == t_py: |
| 452 | code = marshal.load(f) |
| 453 | f.close() |
| 454 | if code is None: |
| 455 | code = self._compile(pathname + '.py', t_py) |
| 456 | return ispkg, code, values |
| 457 | |
| 458 | def _timestamp(self, pathname): |
| 459 | try: |
| 460 | s = self.os.stat(pathname) |
| 461 | except OSError: |
| 462 | return None |
| 463 | return long(s[8]) |
| 464 | |
| 465 | def _compile(self, pathname, timestamp): |
| 466 | codestring = open(pathname, 'r').read() |
| 467 | if codestring and codestring[-1] != '\n': |
| 468 | codestring = codestring + '\n' |
| 469 | code = __builtin__.compile(codestring, pathname, 'exec') |
| 470 | |
| 471 | # try to cache the compiled code |
| 472 | try: |
| 473 | f = open(pathname + self.ext_char, 'wb') |
| 474 | f.write('\0\0\0\0') |
| 475 | f.write(struct.pack('<I', timestamp)) |
| 476 | marshal.dump(code, f) |
| 477 | f.flush() |
| 478 | f.seek(0, 0) |
| 479 | f.write(imp.get_magic()) |
| 480 | f.close() |
| 481 | except OSError: |
| 482 | pass |
| 483 | |
| 484 | return code |
| 485 | |
| 486 | def __repr__(self): |
| 487 | return '<%s.%s for "%s" at 0x%x>' % (self.__class__.__module__, |
| 488 | self.__class__.__name__, |
| 489 | self.dir, |
| 490 | id(self)) |
| 491 | |
| 492 | def _test_dir(): |
| 493 | "Debug/test function to create DirectoryImporters from sys.path." |
| 494 | path = sys.path[:] |
| 495 | path.reverse() |
| 496 | for d in path: |
| 497 | DirectoryImporter(d).install() |
| 498 | |
| 499 | ###################################################################### |