Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 1 | """zipimport provides support for importing Python modules from Zip archives. |
| 2 | |
| 3 | This module exports three objects: |
| 4 | - zipimporter: a class; its constructor takes a path to a Zip archive. |
| 5 | - ZipImportError: exception raised by zipimporter objects. It's a |
| 6 | subclass of ImportError, so it can be caught as ImportError, too. |
| 7 | - _zip_directory_cache: a dict, mapping archive paths to zip directory |
| 8 | info dicts, as used in zipimporter._files. |
| 9 | |
| 10 | It is usually not needed to use the zipimport module explicitly; it is |
| 11 | used by the builtin import mechanism for sys.path items that are paths |
| 12 | to Zip archives. |
| 13 | """ |
| 14 | |
| 15 | #from importlib import _bootstrap_external |
| 16 | #from importlib import _bootstrap # for _verbose_message |
| 17 | import _frozen_importlib_external as _bootstrap_external |
| 18 | from _frozen_importlib_external import _unpack_uint16, _unpack_uint32 |
| 19 | import _frozen_importlib as _bootstrap # for _verbose_message |
| 20 | import _imp # for check_hash_based_pycs |
| 21 | import _io # for open |
| 22 | import marshal # for loads |
| 23 | import sys # for modules |
| 24 | import time # for mktime |
Brett Cannon | 2de5097 | 2020-12-04 15:39:21 -0800 | [diff] [blame] | 25 | import _warnings # For warn() |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 26 | |
| 27 | __all__ = ['ZipImportError', 'zipimporter'] |
| 28 | |
| 29 | |
| 30 | path_sep = _bootstrap_external.path_sep |
| 31 | alt_path_sep = _bootstrap_external.path_separators[1:] |
| 32 | |
| 33 | |
| 34 | class ZipImportError(ImportError): |
| 35 | pass |
| 36 | |
| 37 | # _read_directory() cache |
| 38 | _zip_directory_cache = {} |
| 39 | |
| 40 | _module_type = type(sys) |
| 41 | |
Zackery Spytz | 5a5ce06 | 2018-09-25 13:15:47 -0600 | [diff] [blame] | 42 | END_CENTRAL_DIR_SIZE = 22 |
| 43 | STRING_END_ARCHIVE = b'PK\x05\x06' |
| 44 | MAX_COMMENT_LEN = (1 << 16) - 1 |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 45 | |
Brett Cannon | d2e94bb | 2020-11-13 15:14:58 -0800 | [diff] [blame] | 46 | class zipimporter(_bootstrap_external._LoaderBasics): |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 47 | """zipimporter(archivepath) -> zipimporter object |
| 48 | |
| 49 | Create a new zipimporter instance. 'archivepath' must be a path to |
| 50 | a zipfile, or to a specific path inside a zipfile. For example, it can be |
| 51 | '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a |
| 52 | valid directory inside the archive. |
| 53 | |
| 54 | 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip |
| 55 | archive. |
| 56 | |
| 57 | The 'archive' attribute of zipimporter objects contains the name of the |
| 58 | zipfile targeted. |
| 59 | """ |
| 60 | |
| 61 | # Split the "subdirectory" from the Zip archive path, lookup a matching |
| 62 | # entry in sys.path_importer_cache, fetch the file directory from there |
| 63 | # if found, or else read it from the archive. |
| 64 | def __init__(self, path): |
| 65 | if not isinstance(path, str): |
| 66 | import os |
| 67 | path = os.fsdecode(path) |
| 68 | if not path: |
| 69 | raise ZipImportError('archive path is empty', path=path) |
| 70 | if alt_path_sep: |
| 71 | path = path.replace(alt_path_sep, path_sep) |
| 72 | |
| 73 | prefix = [] |
| 74 | while True: |
| 75 | try: |
| 76 | st = _bootstrap_external._path_stat(path) |
| 77 | except (OSError, ValueError): |
| 78 | # On Windows a ValueError is raised for too long paths. |
| 79 | # Back up one path element. |
| 80 | dirname, basename = _bootstrap_external._path_split(path) |
| 81 | if dirname == path: |
| 82 | raise ZipImportError('not a Zip file', path=path) |
| 83 | path = dirname |
| 84 | prefix.append(basename) |
| 85 | else: |
| 86 | # it exists |
| 87 | if (st.st_mode & 0o170000) != 0o100000: # stat.S_ISREG |
| 88 | # it's a not file |
| 89 | raise ZipImportError('not a Zip file', path=path) |
| 90 | break |
| 91 | |
| 92 | try: |
| 93 | files = _zip_directory_cache[path] |
| 94 | except KeyError: |
| 95 | files = _read_directory(path) |
| 96 | _zip_directory_cache[path] = files |
| 97 | self._files = files |
| 98 | self.archive = path |
| 99 | # a prefix directory following the ZIP file path. |
| 100 | self.prefix = _bootstrap_external._path_join(*prefix[::-1]) |
| 101 | if self.prefix: |
| 102 | self.prefix += path_sep |
| 103 | |
| 104 | |
| 105 | # Check whether we can satisfy the import of the module named by |
| 106 | # 'fullname', or whether it could be a portion of a namespace |
| 107 | # package. Return self if we can load it, a string containing the |
| 108 | # full path if it's a possible namespace portion, None if we |
| 109 | # can't load it. |
| 110 | def find_loader(self, fullname, path=None): |
| 111 | """find_loader(fullname, path=None) -> self, str or None. |
| 112 | |
| 113 | Search for a module specified by 'fullname'. 'fullname' must be the |
| 114 | fully qualified (dotted) module name. It returns the zipimporter |
| 115 | instance itself if the module was found, a string containing the |
| 116 | full path name if it's possibly a portion of a namespace package, |
| 117 | or None otherwise. The optional 'path' argument is ignored -- it's |
| 118 | there for compatibility with the importer protocol. |
Brett Cannon | d2e94bb | 2020-11-13 15:14:58 -0800 | [diff] [blame] | 119 | |
| 120 | Deprecated since Python 3.10. Use find_spec() instead. |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 121 | """ |
| 122 | mi = _get_module_info(self, fullname) |
| 123 | if mi is not None: |
| 124 | # This is a module or package. |
| 125 | return self, [] |
| 126 | |
| 127 | # Not a module or regular package. See if this is a directory, and |
| 128 | # therefore possibly a portion of a namespace package. |
| 129 | |
| 130 | # We're only interested in the last path component of fullname |
| 131 | # earlier components are recorded in self.prefix. |
| 132 | modpath = _get_module_path(self, fullname) |
| 133 | if _is_dir(self, modpath): |
| 134 | # This is possibly a portion of a namespace |
| 135 | # package. Return the string representing its path, |
| 136 | # without a trailing separator. |
| 137 | return None, [f'{self.archive}{path_sep}{modpath}'] |
| 138 | |
| 139 | return None, [] |
| 140 | |
| 141 | |
| 142 | # Check whether we can satisfy the import of the module named by |
| 143 | # 'fullname'. Return self if we can, None if we can't. |
| 144 | def find_module(self, fullname, path=None): |
| 145 | """find_module(fullname, path=None) -> self or None. |
| 146 | |
| 147 | Search for a module specified by 'fullname'. 'fullname' must be the |
| 148 | fully qualified (dotted) module name. It returns the zipimporter |
| 149 | instance itself if the module was found, or None if it wasn't. |
| 150 | The optional 'path' argument is ignored -- it's there for compatibility |
| 151 | with the importer protocol. |
Brett Cannon | d2e94bb | 2020-11-13 15:14:58 -0800 | [diff] [blame] | 152 | |
| 153 | Deprecated since Python 3.10. Use find_spec() instead. |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 154 | """ |
| 155 | return self.find_loader(fullname, path)[0] |
| 156 | |
Brett Cannon | d2e94bb | 2020-11-13 15:14:58 -0800 | [diff] [blame] | 157 | def find_spec(self, fullname, target=None): |
| 158 | """Create a ModuleSpec for the specified module. |
| 159 | |
| 160 | Returns None if the module cannot be found. |
| 161 | """ |
| 162 | module_info = _get_module_info(self, fullname) |
| 163 | if module_info is not None: |
| 164 | return _bootstrap.spec_from_loader(fullname, self, is_package=module_info) |
| 165 | else: |
| 166 | # Not a module or regular package. See if this is a directory, and |
| 167 | # therefore possibly a portion of a namespace package. |
| 168 | |
| 169 | # We're only interested in the last path component of fullname |
| 170 | # earlier components are recorded in self.prefix. |
| 171 | modpath = _get_module_path(self, fullname) |
| 172 | if _is_dir(self, modpath): |
| 173 | # This is possibly a portion of a namespace |
| 174 | # package. Return the string representing its path, |
| 175 | # without a trailing separator. |
| 176 | path = f'{self.archive}{path_sep}{modpath}' |
| 177 | spec = _bootstrap.ModuleSpec(name=fullname, loader=None, |
| 178 | is_package=True) |
| 179 | spec.submodule_search_locations.append(path) |
| 180 | return spec |
| 181 | else: |
| 182 | return None |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 183 | |
| 184 | def get_code(self, fullname): |
| 185 | """get_code(fullname) -> code object. |
| 186 | |
| 187 | Return the code object for the specified module. Raise ZipImportError |
Irit Katriel | fb34096 | 2020-12-19 00:09:54 +0000 | [diff] [blame] | 188 | if the module couldn't be imported. |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 189 | """ |
| 190 | code, ispackage, modpath = _get_module_code(self, fullname) |
| 191 | return code |
| 192 | |
| 193 | |
| 194 | def get_data(self, pathname): |
| 195 | """get_data(pathname) -> string with file data. |
| 196 | |
| 197 | Return the data associated with 'pathname'. Raise OSError if |
| 198 | the file wasn't found. |
| 199 | """ |
| 200 | if alt_path_sep: |
| 201 | pathname = pathname.replace(alt_path_sep, path_sep) |
| 202 | |
| 203 | key = pathname |
| 204 | if pathname.startswith(self.archive + path_sep): |
| 205 | key = pathname[len(self.archive + path_sep):] |
| 206 | |
| 207 | try: |
| 208 | toc_entry = self._files[key] |
| 209 | except KeyError: |
| 210 | raise OSError(0, '', key) |
| 211 | return _get_data(self.archive, toc_entry) |
| 212 | |
| 213 | |
| 214 | # Return a string matching __file__ for the named module |
| 215 | def get_filename(self, fullname): |
| 216 | """get_filename(fullname) -> filename string. |
| 217 | |
Irit Katriel | fb34096 | 2020-12-19 00:09:54 +0000 | [diff] [blame] | 218 | Return the filename for the specified module or raise ZipImportError |
| 219 | if it couldn't be imported. |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 220 | """ |
| 221 | # Deciding the filename requires working out where the code |
| 222 | # would come from if the module was actually loaded |
| 223 | code, ispackage, modpath = _get_module_code(self, fullname) |
| 224 | return modpath |
| 225 | |
| 226 | |
| 227 | def get_source(self, fullname): |
| 228 | """get_source(fullname) -> source string. |
| 229 | |
| 230 | Return the source code for the specified module. Raise ZipImportError |
| 231 | if the module couldn't be found, return None if the archive does |
| 232 | contain the module, but has no source for it. |
| 233 | """ |
| 234 | mi = _get_module_info(self, fullname) |
| 235 | if mi is None: |
| 236 | raise ZipImportError(f"can't find module {fullname!r}", name=fullname) |
| 237 | |
| 238 | path = _get_module_path(self, fullname) |
| 239 | if mi: |
| 240 | fullpath = _bootstrap_external._path_join(path, '__init__.py') |
| 241 | else: |
| 242 | fullpath = f'{path}.py' |
| 243 | |
| 244 | try: |
| 245 | toc_entry = self._files[fullpath] |
| 246 | except KeyError: |
| 247 | # we have the module, but no source |
| 248 | return None |
| 249 | return _get_data(self.archive, toc_entry).decode() |
| 250 | |
| 251 | |
| 252 | # Return a bool signifying whether the module is a package or not. |
| 253 | def is_package(self, fullname): |
| 254 | """is_package(fullname) -> bool. |
| 255 | |
| 256 | Return True if the module specified by fullname is a package. |
| 257 | Raise ZipImportError if the module couldn't be found. |
| 258 | """ |
| 259 | mi = _get_module_info(self, fullname) |
| 260 | if mi is None: |
| 261 | raise ZipImportError(f"can't find module {fullname!r}", name=fullname) |
| 262 | return mi |
| 263 | |
| 264 | |
| 265 | # Load and return the module named by 'fullname'. |
| 266 | def load_module(self, fullname): |
| 267 | """load_module(fullname) -> module. |
| 268 | |
| 269 | Load the module specified by 'fullname'. 'fullname' must be the |
| 270 | fully qualified (dotted) module name. It returns the imported |
Irit Katriel | fb34096 | 2020-12-19 00:09:54 +0000 | [diff] [blame] | 271 | module, or raises ZipImportError if it could not be imported. |
Brett Cannon | d2e94bb | 2020-11-13 15:14:58 -0800 | [diff] [blame] | 272 | |
Brett Cannon | 2de5097 | 2020-12-04 15:39:21 -0800 | [diff] [blame] | 273 | Deprecated since Python 3.10. Use exec_module() instead. |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 274 | """ |
Brett Cannon | 2de5097 | 2020-12-04 15:39:21 -0800 | [diff] [blame] | 275 | msg = ("zipimport.zipimporter.load_module() is deprecated and slated for " |
| 276 | "removal in Python 3.12; use exec_module() instead") |
| 277 | _warnings.warn(msg, DeprecationWarning) |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 278 | code, ispackage, modpath = _get_module_code(self, fullname) |
| 279 | mod = sys.modules.get(fullname) |
| 280 | if mod is None or not isinstance(mod, _module_type): |
| 281 | mod = _module_type(fullname) |
| 282 | sys.modules[fullname] = mod |
| 283 | mod.__loader__ = self |
| 284 | |
| 285 | try: |
| 286 | if ispackage: |
| 287 | # add __path__ to the module *before* the code gets |
| 288 | # executed |
| 289 | path = _get_module_path(self, fullname) |
| 290 | fullpath = _bootstrap_external._path_join(self.archive, path) |
| 291 | mod.__path__ = [fullpath] |
| 292 | |
| 293 | if not hasattr(mod, '__builtins__'): |
| 294 | mod.__builtins__ = __builtins__ |
| 295 | _bootstrap_external._fix_up_module(mod.__dict__, fullname, modpath) |
| 296 | exec(code, mod.__dict__) |
| 297 | except: |
| 298 | del sys.modules[fullname] |
| 299 | raise |
| 300 | |
| 301 | try: |
| 302 | mod = sys.modules[fullname] |
| 303 | except KeyError: |
| 304 | raise ImportError(f'Loaded module {fullname!r} not found in sys.modules') |
| 305 | _bootstrap._verbose_message('import {} # loaded from Zip {}', fullname, modpath) |
| 306 | return mod |
| 307 | |
| 308 | |
| 309 | def get_resource_reader(self, fullname): |
| 310 | """Return the ResourceReader for a package in a zip file. |
| 311 | |
| 312 | If 'fullname' is a package within the zip file, return the |
| 313 | 'ResourceReader' object for the package. Otherwise return None. |
| 314 | """ |
Serhiy Storchaka | 9da3961 | 2018-09-19 09:28:06 +0300 | [diff] [blame] | 315 | try: |
| 316 | if not self.is_package(fullname): |
| 317 | return None |
| 318 | except ZipImportError: |
| 319 | return None |
Jason R. Coombs | 843c277 | 2020-06-07 21:00:51 -0400 | [diff] [blame] | 320 | from importlib.readers import ZipReader |
| 321 | return ZipReader(self, fullname) |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 322 | |
| 323 | |
| 324 | def __repr__(self): |
| 325 | return f'<zipimporter object "{self.archive}{path_sep}{self.prefix}">' |
| 326 | |
| 327 | |
| 328 | # _zip_searchorder defines how we search for a module in the Zip |
| 329 | # archive: we first search for a package __init__, then for |
| 330 | # non-package .pyc, and .py entries. The .pyc entries |
| 331 | # are swapped by initzipimport() if we run in optimized mode. Also, |
| 332 | # '/' is replaced by path_sep there. |
| 333 | _zip_searchorder = ( |
| 334 | (path_sep + '__init__.pyc', True, True), |
| 335 | (path_sep + '__init__.py', False, True), |
| 336 | ('.pyc', True, False), |
| 337 | ('.py', False, False), |
| 338 | ) |
| 339 | |
| 340 | # Given a module name, return the potential file path in the |
| 341 | # archive (without extension). |
| 342 | def _get_module_path(self, fullname): |
| 343 | return self.prefix + fullname.rpartition('.')[2] |
| 344 | |
| 345 | # Does this path represent a directory? |
| 346 | def _is_dir(self, path): |
| 347 | # See if this is a "directory". If so, it's eligible to be part |
| 348 | # of a namespace package. We test by seeing if the name, with an |
| 349 | # appended path separator, exists. |
| 350 | dirpath = path + path_sep |
| 351 | # If dirpath is present in self._files, we have a directory. |
| 352 | return dirpath in self._files |
| 353 | |
| 354 | # Return some information about a module. |
| 355 | def _get_module_info(self, fullname): |
| 356 | path = _get_module_path(self, fullname) |
| 357 | for suffix, isbytecode, ispackage in _zip_searchorder: |
| 358 | fullpath = path + suffix |
| 359 | if fullpath in self._files: |
| 360 | return ispackage |
| 361 | return None |
| 362 | |
| 363 | |
| 364 | # implementation |
| 365 | |
| 366 | # _read_directory(archive) -> files dict (new reference) |
| 367 | # |
| 368 | # Given a path to a Zip archive, build a dict, mapping file names |
| 369 | # (local to the archive, using SEP as a separator) to toc entries. |
| 370 | # |
| 371 | # A toc_entry is a tuple: |
| 372 | # |
| 373 | # (__file__, # value to use for __file__, available for all files, |
| 374 | # # encoded to the filesystem encoding |
| 375 | # compress, # compression kind; 0 for uncompressed |
| 376 | # data_size, # size of compressed data on disk |
| 377 | # file_size, # size of decompressed data |
| 378 | # file_offset, # offset of file header from start of archive |
| 379 | # time, # mod time of file (in dos format) |
| 380 | # date, # mod data of file (in dos format) |
| 381 | # crc, # crc checksum of the data |
| 382 | # ) |
| 383 | # |
| 384 | # Directories can be recognized by the trailing path_sep in the name, |
| 385 | # data_size and file_offset are 0. |
| 386 | def _read_directory(archive): |
| 387 | try: |
Steve Dower | b82e17e | 2019-05-23 08:45:22 -0700 | [diff] [blame] | 388 | fp = _io.open_code(archive) |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 389 | except OSError: |
| 390 | raise ZipImportError(f"can't open Zip file: {archive!r}", path=archive) |
| 391 | |
| 392 | with fp: |
| 393 | try: |
Zackery Spytz | 5a5ce06 | 2018-09-25 13:15:47 -0600 | [diff] [blame] | 394 | fp.seek(-END_CENTRAL_DIR_SIZE, 2) |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 395 | header_position = fp.tell() |
Zackery Spytz | 5a5ce06 | 2018-09-25 13:15:47 -0600 | [diff] [blame] | 396 | buffer = fp.read(END_CENTRAL_DIR_SIZE) |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 397 | except OSError: |
| 398 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
Zackery Spytz | 5a5ce06 | 2018-09-25 13:15:47 -0600 | [diff] [blame] | 399 | if len(buffer) != END_CENTRAL_DIR_SIZE: |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 400 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
Zackery Spytz | 5a5ce06 | 2018-09-25 13:15:47 -0600 | [diff] [blame] | 401 | if buffer[:4] != STRING_END_ARCHIVE: |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 402 | # Bad: End of Central Dir signature |
Zackery Spytz | 5a5ce06 | 2018-09-25 13:15:47 -0600 | [diff] [blame] | 403 | # Check if there's a comment. |
| 404 | try: |
| 405 | fp.seek(0, 2) |
| 406 | file_size = fp.tell() |
| 407 | except OSError: |
| 408 | raise ZipImportError(f"can't read Zip file: {archive!r}", |
| 409 | path=archive) |
| 410 | max_comment_start = max(file_size - MAX_COMMENT_LEN - |
| 411 | END_CENTRAL_DIR_SIZE, 0) |
| 412 | try: |
| 413 | fp.seek(max_comment_start) |
| 414 | data = fp.read() |
| 415 | except OSError: |
| 416 | raise ZipImportError(f"can't read Zip file: {archive!r}", |
| 417 | path=archive) |
| 418 | pos = data.rfind(STRING_END_ARCHIVE) |
| 419 | if pos < 0: |
| 420 | raise ZipImportError(f'not a Zip file: {archive!r}', |
| 421 | path=archive) |
| 422 | buffer = data[pos:pos+END_CENTRAL_DIR_SIZE] |
| 423 | if len(buffer) != END_CENTRAL_DIR_SIZE: |
| 424 | raise ZipImportError(f"corrupt Zip file: {archive!r}", |
| 425 | path=archive) |
| 426 | header_position = file_size - len(data) + pos |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 427 | |
| 428 | header_size = _unpack_uint32(buffer[12:16]) |
| 429 | header_offset = _unpack_uint32(buffer[16:20]) |
| 430 | if header_position < header_size: |
| 431 | raise ZipImportError(f'bad central directory size: {archive!r}', path=archive) |
| 432 | if header_position < header_offset: |
| 433 | raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive) |
| 434 | header_position -= header_size |
| 435 | arc_offset = header_position - header_offset |
| 436 | if arc_offset < 0: |
| 437 | raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive) |
| 438 | |
| 439 | files = {} |
| 440 | # Start of Central Directory |
| 441 | count = 0 |
| 442 | try: |
| 443 | fp.seek(header_position) |
| 444 | except OSError: |
| 445 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 446 | while True: |
| 447 | buffer = fp.read(46) |
| 448 | if len(buffer) < 4: |
| 449 | raise EOFError('EOF read where not expected') |
| 450 | # Start of file header |
| 451 | if buffer[:4] != b'PK\x01\x02': |
| 452 | break # Bad: Central Dir File Header |
| 453 | if len(buffer) != 46: |
| 454 | raise EOFError('EOF read where not expected') |
| 455 | flags = _unpack_uint16(buffer[8:10]) |
| 456 | compress = _unpack_uint16(buffer[10:12]) |
| 457 | time = _unpack_uint16(buffer[12:14]) |
| 458 | date = _unpack_uint16(buffer[14:16]) |
| 459 | crc = _unpack_uint32(buffer[16:20]) |
| 460 | data_size = _unpack_uint32(buffer[20:24]) |
| 461 | file_size = _unpack_uint32(buffer[24:28]) |
| 462 | name_size = _unpack_uint16(buffer[28:30]) |
| 463 | extra_size = _unpack_uint16(buffer[30:32]) |
| 464 | comment_size = _unpack_uint16(buffer[32:34]) |
| 465 | file_offset = _unpack_uint32(buffer[42:46]) |
| 466 | header_size = name_size + extra_size + comment_size |
| 467 | if file_offset > header_offset: |
| 468 | raise ZipImportError(f'bad local header offset: {archive!r}', path=archive) |
| 469 | file_offset += arc_offset |
| 470 | |
| 471 | try: |
| 472 | name = fp.read(name_size) |
| 473 | except OSError: |
| 474 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 475 | if len(name) != name_size: |
| 476 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 477 | # On Windows, calling fseek to skip over the fields we don't use is |
| 478 | # slower than reading the data because fseek flushes stdio's |
| 479 | # internal buffers. See issue #8745. |
| 480 | try: |
| 481 | if len(fp.read(header_size - name_size)) != header_size - name_size: |
| 482 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 483 | except OSError: |
| 484 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 485 | |
| 486 | if flags & 0x800: |
| 487 | # UTF-8 file names extension |
| 488 | name = name.decode() |
| 489 | else: |
| 490 | # Historical ZIP filename encoding |
| 491 | try: |
| 492 | name = name.decode('ascii') |
| 493 | except UnicodeDecodeError: |
| 494 | name = name.decode('latin1').translate(cp437_table) |
| 495 | |
| 496 | name = name.replace('/', path_sep) |
| 497 | path = _bootstrap_external._path_join(archive, name) |
| 498 | t = (path, compress, data_size, file_size, file_offset, time, date, crc) |
| 499 | files[name] = t |
| 500 | count += 1 |
| 501 | _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive) |
| 502 | return files |
| 503 | |
| 504 | # During bootstrap, we may need to load the encodings |
| 505 | # package from a ZIP file. But the cp437 encoding is implemented |
| 506 | # in Python in the encodings package. |
| 507 | # |
| 508 | # Break out of this dependency by using the translation table for |
| 509 | # the cp437 encoding. |
| 510 | cp437_table = ( |
| 511 | # ASCII part, 8 rows x 16 chars |
| 512 | '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f' |
| 513 | '\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f' |
| 514 | ' !"#$%&\'()*+,-./' |
| 515 | '0123456789:;<=>?' |
| 516 | '@ABCDEFGHIJKLMNO' |
| 517 | 'PQRSTUVWXYZ[\\]^_' |
| 518 | '`abcdefghijklmno' |
| 519 | 'pqrstuvwxyz{|}~\x7f' |
| 520 | # non-ASCII part, 16 rows x 8 chars |
| 521 | '\xc7\xfc\xe9\xe2\xe4\xe0\xe5\xe7' |
| 522 | '\xea\xeb\xe8\xef\xee\xec\xc4\xc5' |
| 523 | '\xc9\xe6\xc6\xf4\xf6\xf2\xfb\xf9' |
| 524 | '\xff\xd6\xdc\xa2\xa3\xa5\u20a7\u0192' |
| 525 | '\xe1\xed\xf3\xfa\xf1\xd1\xaa\xba' |
| 526 | '\xbf\u2310\xac\xbd\xbc\xa1\xab\xbb' |
| 527 | '\u2591\u2592\u2593\u2502\u2524\u2561\u2562\u2556' |
| 528 | '\u2555\u2563\u2551\u2557\u255d\u255c\u255b\u2510' |
| 529 | '\u2514\u2534\u252c\u251c\u2500\u253c\u255e\u255f' |
| 530 | '\u255a\u2554\u2569\u2566\u2560\u2550\u256c\u2567' |
| 531 | '\u2568\u2564\u2565\u2559\u2558\u2552\u2553\u256b' |
| 532 | '\u256a\u2518\u250c\u2588\u2584\u258c\u2590\u2580' |
| 533 | '\u03b1\xdf\u0393\u03c0\u03a3\u03c3\xb5\u03c4' |
| 534 | '\u03a6\u0398\u03a9\u03b4\u221e\u03c6\u03b5\u2229' |
| 535 | '\u2261\xb1\u2265\u2264\u2320\u2321\xf7\u2248' |
| 536 | '\xb0\u2219\xb7\u221a\u207f\xb2\u25a0\xa0' |
| 537 | ) |
| 538 | |
| 539 | _importing_zlib = False |
| 540 | |
| 541 | # Return the zlib.decompress function object, or NULL if zlib couldn't |
| 542 | # be imported. The function is cached when found, so subsequent calls |
| 543 | # don't import zlib again. |
| 544 | def _get_decompress_func(): |
| 545 | global _importing_zlib |
| 546 | if _importing_zlib: |
| 547 | # Someone has a zlib.py[co] in their Zip file |
| 548 | # let's avoid a stack overflow. |
| 549 | _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE') |
| 550 | raise ZipImportError("can't decompress data; zlib not available") |
| 551 | |
| 552 | _importing_zlib = True |
| 553 | try: |
| 554 | from zlib import decompress |
| 555 | except Exception: |
| 556 | _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE') |
| 557 | raise ZipImportError("can't decompress data; zlib not available") |
| 558 | finally: |
| 559 | _importing_zlib = False |
| 560 | |
| 561 | _bootstrap._verbose_message('zipimport: zlib available') |
| 562 | return decompress |
| 563 | |
| 564 | # Given a path to a Zip file and a toc_entry, return the (uncompressed) data. |
| 565 | def _get_data(archive, toc_entry): |
| 566 | datapath, compress, data_size, file_size, file_offset, time, date, crc = toc_entry |
| 567 | if data_size < 0: |
| 568 | raise ZipImportError('negative data size') |
| 569 | |
Steve Dower | b82e17e | 2019-05-23 08:45:22 -0700 | [diff] [blame] | 570 | with _io.open_code(archive) as fp: |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 571 | # Check to make sure the local file header is correct |
| 572 | try: |
| 573 | fp.seek(file_offset) |
| 574 | except OSError: |
| 575 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 576 | buffer = fp.read(30) |
| 577 | if len(buffer) != 30: |
| 578 | raise EOFError('EOF read where not expected') |
| 579 | |
| 580 | if buffer[:4] != b'PK\x03\x04': |
| 581 | # Bad: Local File Header |
| 582 | raise ZipImportError(f'bad local file header: {archive!r}', path=archive) |
| 583 | |
| 584 | name_size = _unpack_uint16(buffer[26:28]) |
| 585 | extra_size = _unpack_uint16(buffer[28:30]) |
| 586 | header_size = 30 + name_size + extra_size |
| 587 | file_offset += header_size # Start of file data |
| 588 | try: |
| 589 | fp.seek(file_offset) |
| 590 | except OSError: |
| 591 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 592 | raw_data = fp.read(data_size) |
| 593 | if len(raw_data) != data_size: |
| 594 | raise OSError("zipimport: can't read data") |
| 595 | |
| 596 | if compress == 0: |
| 597 | # data is not compressed |
| 598 | return raw_data |
| 599 | |
| 600 | # Decompress with zlib |
| 601 | try: |
| 602 | decompress = _get_decompress_func() |
| 603 | except Exception: |
| 604 | raise ZipImportError("can't decompress data; zlib not available") |
| 605 | return decompress(raw_data, -15) |
| 606 | |
| 607 | |
| 608 | # Lenient date/time comparison function. The precision of the mtime |
| 609 | # in the archive is lower than the mtime stored in a .pyc: we |
| 610 | # must allow a difference of at most one second. |
| 611 | def _eq_mtime(t1, t2): |
| 612 | # dostime only stores even seconds, so be lenient |
| 613 | return abs(t1 - t2) <= 1 |
| 614 | |
Elvis Pranskevichus | a6e956b | 2018-11-07 13:34:59 -0500 | [diff] [blame] | 615 | |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 616 | # Given the contents of a .py[co] file, unmarshal the data |
Irit Katriel | fb34096 | 2020-12-19 00:09:54 +0000 | [diff] [blame] | 617 | # and return the code object. Raises ImportError it the magic word doesn't |
| 618 | # match, or if the recorded .py[co] metadata does not match the source. |
Elvis Pranskevichus | a6e956b | 2018-11-07 13:34:59 -0500 | [diff] [blame] | 619 | def _unmarshal_code(self, pathname, fullpath, fullname, data): |
| 620 | exc_details = { |
| 621 | 'name': fullname, |
| 622 | 'path': fullpath, |
| 623 | } |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 624 | |
Irit Katriel | fb34096 | 2020-12-19 00:09:54 +0000 | [diff] [blame] | 625 | flags = _bootstrap_external._classify_pyc(data, fullname, exc_details) |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 626 | |
Elvis Pranskevichus | a6e956b | 2018-11-07 13:34:59 -0500 | [diff] [blame] | 627 | hash_based = flags & 0b1 != 0 |
| 628 | if hash_based: |
| 629 | check_source = flags & 0b10 != 0 |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 630 | if (_imp.check_hash_based_pycs != 'never' and |
Elvis Pranskevichus | a6e956b | 2018-11-07 13:34:59 -0500 | [diff] [blame] | 631 | (check_source or _imp.check_hash_based_pycs == 'always')): |
| 632 | source_bytes = _get_pyc_source(self, fullpath) |
| 633 | if source_bytes is not None: |
| 634 | source_hash = _imp.source_hash( |
| 635 | _bootstrap_external._RAW_MAGIC_NUMBER, |
| 636 | source_bytes, |
| 637 | ) |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 638 | |
Irit Katriel | fb34096 | 2020-12-19 00:09:54 +0000 | [diff] [blame] | 639 | _bootstrap_external._validate_hash_pyc( |
| 640 | data, source_hash, fullname, exc_details) |
Elvis Pranskevichus | a6e956b | 2018-11-07 13:34:59 -0500 | [diff] [blame] | 641 | else: |
| 642 | source_mtime, source_size = \ |
| 643 | _get_mtime_and_size_of_source(self, fullpath) |
| 644 | |
| 645 | if source_mtime: |
| 646 | # We don't use _bootstrap_external._validate_timestamp_pyc |
| 647 | # to allow for a more lenient timestamp check. |
| 648 | if (not _eq_mtime(_unpack_uint32(data[8:12]), source_mtime) or |
| 649 | _unpack_uint32(data[12:16]) != source_size): |
| 650 | _bootstrap._verbose_message( |
| 651 | f'bytecode is stale for {fullname!r}') |
| 652 | return None |
| 653 | |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 654 | code = marshal.loads(data[16:]) |
| 655 | if not isinstance(code, _code_type): |
| 656 | raise TypeError(f'compiled module {pathname!r} is not a code object') |
| 657 | return code |
| 658 | |
| 659 | _code_type = type(_unmarshal_code.__code__) |
| 660 | |
| 661 | |
| 662 | # Replace any occurrences of '\r\n?' in the input string with '\n'. |
| 663 | # This converts DOS and Mac line endings to Unix line endings. |
| 664 | def _normalize_line_endings(source): |
| 665 | source = source.replace(b'\r\n', b'\n') |
| 666 | source = source.replace(b'\r', b'\n') |
| 667 | return source |
| 668 | |
| 669 | # Given a string buffer containing Python source code, compile it |
| 670 | # and return a code object. |
| 671 | def _compile_source(pathname, source): |
| 672 | source = _normalize_line_endings(source) |
| 673 | return compile(source, pathname, 'exec', dont_inherit=True) |
| 674 | |
| 675 | # Convert the date/time values found in the Zip archive to a value |
| 676 | # that's compatible with the time stamp stored in .pyc files. |
| 677 | def _parse_dostime(d, t): |
| 678 | return time.mktime(( |
| 679 | (d >> 9) + 1980, # bits 9..15: year |
| 680 | (d >> 5) & 0xF, # bits 5..8: month |
| 681 | d & 0x1F, # bits 0..4: day |
| 682 | t >> 11, # bits 11..15: hours |
| 683 | (t >> 5) & 0x3F, # bits 8..10: minutes |
| 684 | (t & 0x1F) * 2, # bits 0..7: seconds / 2 |
| 685 | -1, -1, -1)) |
| 686 | |
| 687 | # Given a path to a .pyc file in the archive, return the |
Elvis Pranskevichus | a6e956b | 2018-11-07 13:34:59 -0500 | [diff] [blame] | 688 | # modification time of the matching .py file and its size, |
| 689 | # or (0, 0) if no source is available. |
| 690 | def _get_mtime_and_size_of_source(self, path): |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 691 | try: |
| 692 | # strip 'c' or 'o' from *.py[co] |
| 693 | assert path[-1:] in ('c', 'o') |
| 694 | path = path[:-1] |
| 695 | toc_entry = self._files[path] |
| 696 | # fetch the time stamp of the .py file for comparison |
| 697 | # with an embedded pyc time stamp |
| 698 | time = toc_entry[5] |
| 699 | date = toc_entry[6] |
Elvis Pranskevichus | a6e956b | 2018-11-07 13:34:59 -0500 | [diff] [blame] | 700 | uncompressed_size = toc_entry[3] |
| 701 | return _parse_dostime(date, time), uncompressed_size |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 702 | except (KeyError, IndexError, TypeError): |
Elvis Pranskevichus | a6e956b | 2018-11-07 13:34:59 -0500 | [diff] [blame] | 703 | return 0, 0 |
| 704 | |
| 705 | |
| 706 | # Given a path to a .pyc file in the archive, return the |
| 707 | # contents of the matching .py file, or None if no source |
| 708 | # is available. |
| 709 | def _get_pyc_source(self, path): |
| 710 | # strip 'c' or 'o' from *.py[co] |
| 711 | assert path[-1:] in ('c', 'o') |
| 712 | path = path[:-1] |
| 713 | |
| 714 | try: |
| 715 | toc_entry = self._files[path] |
| 716 | except KeyError: |
| 717 | return None |
| 718 | else: |
| 719 | return _get_data(self.archive, toc_entry) |
| 720 | |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 721 | |
| 722 | # Get the code object associated with the module specified by |
| 723 | # 'fullname'. |
| 724 | def _get_module_code(self, fullname): |
| 725 | path = _get_module_path(self, fullname) |
Irit Katriel | fb34096 | 2020-12-19 00:09:54 +0000 | [diff] [blame] | 726 | import_error = None |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 727 | for suffix, isbytecode, ispackage in _zip_searchorder: |
| 728 | fullpath = path + suffix |
| 729 | _bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2) |
| 730 | try: |
| 731 | toc_entry = self._files[fullpath] |
| 732 | except KeyError: |
| 733 | pass |
| 734 | else: |
| 735 | modpath = toc_entry[0] |
| 736 | data = _get_data(self.archive, toc_entry) |
Irit Katriel | fb34096 | 2020-12-19 00:09:54 +0000 | [diff] [blame] | 737 | code = None |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 738 | if isbytecode: |
Irit Katriel | fb34096 | 2020-12-19 00:09:54 +0000 | [diff] [blame] | 739 | try: |
| 740 | code = _unmarshal_code(self, modpath, fullpath, fullname, data) |
| 741 | except ImportError as exc: |
| 742 | import_error = exc |
Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame] | 743 | else: |
| 744 | code = _compile_source(modpath, data) |
| 745 | if code is None: |
| 746 | # bad magic number or non-matching mtime |
| 747 | # in byte code, try next |
| 748 | continue |
| 749 | modpath = toc_entry[0] |
| 750 | return code, ispackage, modpath |
| 751 | else: |
Irit Katriel | fb34096 | 2020-12-19 00:09:54 +0000 | [diff] [blame] | 752 | if import_error: |
| 753 | msg = f"module load failed: {import_error}" |
| 754 | raise ZipImportError(msg, name=fullname) from import_error |
| 755 | else: |
| 756 | raise ZipImportError(f"can't find module {fullname!r}", name=fullname) |