Jingwen Chen | 475b3cc | 2021-01-05 21:45:16 -0500 | [diff] [blame] | 1 | """zipimport provides support for importing Python modules from Zip archives. |
| 2 | |
| 3 | This module exports three objects: |
| 4 | - zipimporter: a class; its constructor takes a path to a Zip archive. |
| 5 | - ZipImportError: exception raised by zipimporter objects. It's a |
| 6 | subclass of ImportError, so it can be caught as ImportError, too. |
| 7 | - _zip_directory_cache: a dict, mapping archive paths to zip directory |
| 8 | info dicts, as used in zipimporter._files. |
| 9 | |
| 10 | It is usually not needed to use the zipimport module explicitly; it is |
| 11 | used by the builtin import mechanism for sys.path items that are paths |
| 12 | to Zip archives. |
| 13 | """ |
| 14 | |
| 15 | #from importlib import _bootstrap_external |
| 16 | #from importlib import _bootstrap # for _verbose_message |
| 17 | import _frozen_importlib_external as _bootstrap_external |
| 18 | from _frozen_importlib_external import _unpack_uint16, _unpack_uint32 |
| 19 | import _frozen_importlib as _bootstrap # for _verbose_message |
| 20 | import _imp # for check_hash_based_pycs |
| 21 | import _io # for open |
| 22 | import marshal # for loads |
| 23 | import sys # for modules |
| 24 | import time # for mktime |
| 25 | |
| 26 | __all__ = ['ZipImportError', 'zipimporter'] |
| 27 | |
| 28 | |
| 29 | path_sep = _bootstrap_external.path_sep |
| 30 | alt_path_sep = _bootstrap_external.path_separators[1:] |
| 31 | |
| 32 | |
| 33 | class ZipImportError(ImportError): |
| 34 | pass |
| 35 | |
| 36 | # _read_directory() cache |
| 37 | _zip_directory_cache = {} |
| 38 | |
| 39 | _module_type = type(sys) |
| 40 | |
| 41 | END_CENTRAL_DIR_SIZE = 22 |
| 42 | STRING_END_ARCHIVE = b'PK\x05\x06' |
| 43 | MAX_COMMENT_LEN = (1 << 16) - 1 |
| 44 | |
| 45 | class zipimporter: |
| 46 | """zipimporter(archivepath) -> zipimporter object |
| 47 | |
| 48 | Create a new zipimporter instance. 'archivepath' must be a path to |
| 49 | a zipfile, or to a specific path inside a zipfile. For example, it can be |
| 50 | '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a |
| 51 | valid directory inside the archive. |
| 52 | |
| 53 | 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip |
| 54 | archive. |
| 55 | |
| 56 | The 'archive' attribute of zipimporter objects contains the name of the |
| 57 | zipfile targeted. |
| 58 | """ |
| 59 | |
| 60 | # Split the "subdirectory" from the Zip archive path, lookup a matching |
| 61 | # entry in sys.path_importer_cache, fetch the file directory from there |
| 62 | # if found, or else read it from the archive. |
| 63 | def __init__(self, path): |
| 64 | if not isinstance(path, str): |
| 65 | import os |
| 66 | path = os.fsdecode(path) |
| 67 | if not path: |
| 68 | raise ZipImportError('archive path is empty', path=path) |
| 69 | if alt_path_sep: |
| 70 | path = path.replace(alt_path_sep, path_sep) |
| 71 | |
| 72 | prefix = [] |
| 73 | while True: |
| 74 | try: |
| 75 | st = _bootstrap_external._path_stat(path) |
| 76 | except (OSError, ValueError): |
| 77 | # On Windows a ValueError is raised for too long paths. |
| 78 | # Back up one path element. |
| 79 | dirname, basename = _bootstrap_external._path_split(path) |
| 80 | if dirname == path: |
| 81 | raise ZipImportError('not a Zip file', path=path) |
| 82 | path = dirname |
| 83 | prefix.append(basename) |
| 84 | else: |
| 85 | # it exists |
| 86 | if (st.st_mode & 0o170000) != 0o100000: # stat.S_ISREG |
| 87 | # it's a not file |
| 88 | raise ZipImportError('not a Zip file', path=path) |
| 89 | break |
| 90 | |
| 91 | try: |
| 92 | files = _zip_directory_cache[path] |
| 93 | except KeyError: |
| 94 | files = _read_directory(path) |
| 95 | _zip_directory_cache[path] = files |
| 96 | self._files = files |
| 97 | self.archive = path |
| 98 | # a prefix directory following the ZIP file path. |
| 99 | self.prefix = _bootstrap_external._path_join(*prefix[::-1]) |
| 100 | if self.prefix: |
| 101 | self.prefix += path_sep |
| 102 | |
| 103 | |
| 104 | # Check whether we can satisfy the import of the module named by |
| 105 | # 'fullname', or whether it could be a portion of a namespace |
| 106 | # package. Return self if we can load it, a string containing the |
| 107 | # full path if it's a possible namespace portion, None if we |
| 108 | # can't load it. |
| 109 | def find_loader(self, fullname, path=None): |
| 110 | """find_loader(fullname, path=None) -> self, str or None. |
| 111 | |
| 112 | Search for a module specified by 'fullname'. 'fullname' must be the |
| 113 | fully qualified (dotted) module name. It returns the zipimporter |
| 114 | instance itself if the module was found, a string containing the |
| 115 | full path name if it's possibly a portion of a namespace package, |
| 116 | or None otherwise. The optional 'path' argument is ignored -- it's |
| 117 | there for compatibility with the importer protocol. |
| 118 | """ |
| 119 | mi = _get_module_info(self, fullname) |
| 120 | if mi is not None: |
| 121 | # This is a module or package. |
| 122 | return self, [] |
| 123 | |
| 124 | # Not a module or regular package. See if this is a directory, and |
| 125 | # therefore possibly a portion of a namespace package. |
| 126 | |
| 127 | # We're only interested in the last path component of fullname |
| 128 | # earlier components are recorded in self.prefix. |
| 129 | modpath = _get_module_path(self, fullname) |
| 130 | if _is_dir(self, modpath): |
| 131 | # This is possibly a portion of a namespace |
| 132 | # package. Return the string representing its path, |
| 133 | # without a trailing separator. |
| 134 | return None, [f'{self.archive}{path_sep}{modpath}'] |
| 135 | |
| 136 | return None, [] |
| 137 | |
| 138 | |
| 139 | # Check whether we can satisfy the import of the module named by |
| 140 | # 'fullname'. Return self if we can, None if we can't. |
| 141 | def find_module(self, fullname, path=None): |
| 142 | """find_module(fullname, path=None) -> self or None. |
| 143 | |
| 144 | Search for a module specified by 'fullname'. 'fullname' must be the |
| 145 | fully qualified (dotted) module name. It returns the zipimporter |
| 146 | instance itself if the module was found, or None if it wasn't. |
| 147 | The optional 'path' argument is ignored -- it's there for compatibility |
| 148 | with the importer protocol. |
| 149 | """ |
| 150 | return self.find_loader(fullname, path)[0] |
| 151 | |
| 152 | |
| 153 | def get_code(self, fullname): |
| 154 | """get_code(fullname) -> code object. |
| 155 | |
| 156 | Return the code object for the specified module. Raise ZipImportError |
| 157 | if the module couldn't be found. |
| 158 | """ |
| 159 | code, ispackage, modpath = _get_module_code(self, fullname) |
| 160 | return code |
| 161 | |
| 162 | |
| 163 | def get_data(self, pathname): |
| 164 | """get_data(pathname) -> string with file data. |
| 165 | |
| 166 | Return the data associated with 'pathname'. Raise OSError if |
| 167 | the file wasn't found. |
| 168 | """ |
| 169 | if alt_path_sep: |
| 170 | pathname = pathname.replace(alt_path_sep, path_sep) |
| 171 | |
| 172 | key = pathname |
| 173 | if pathname.startswith(self.archive + path_sep): |
| 174 | key = pathname[len(self.archive + path_sep):] |
| 175 | |
| 176 | try: |
| 177 | toc_entry = self._files[key] |
| 178 | except KeyError: |
| 179 | raise OSError(0, '', key) |
| 180 | return _get_data(self.archive, toc_entry) |
| 181 | |
| 182 | |
| 183 | # Return a string matching __file__ for the named module |
| 184 | def get_filename(self, fullname): |
| 185 | """get_filename(fullname) -> filename string. |
| 186 | |
| 187 | Return the filename for the specified module. |
| 188 | """ |
| 189 | # Deciding the filename requires working out where the code |
| 190 | # would come from if the module was actually loaded |
| 191 | code, ispackage, modpath = _get_module_code(self, fullname) |
| 192 | return modpath |
| 193 | |
| 194 | |
| 195 | def get_source(self, fullname): |
| 196 | """get_source(fullname) -> source string. |
| 197 | |
| 198 | Return the source code for the specified module. Raise ZipImportError |
| 199 | if the module couldn't be found, return None if the archive does |
| 200 | contain the module, but has no source for it. |
| 201 | """ |
| 202 | mi = _get_module_info(self, fullname) |
| 203 | if mi is None: |
| 204 | raise ZipImportError(f"can't find module {fullname!r}", name=fullname) |
| 205 | |
| 206 | path = _get_module_path(self, fullname) |
| 207 | if mi: |
| 208 | fullpath = _bootstrap_external._path_join(path, '__init__.py') |
| 209 | else: |
| 210 | fullpath = f'{path}.py' |
| 211 | |
| 212 | try: |
| 213 | toc_entry = self._files[fullpath] |
| 214 | except KeyError: |
| 215 | # we have the module, but no source |
| 216 | return None |
| 217 | return _get_data(self.archive, toc_entry).decode() |
| 218 | |
| 219 | |
| 220 | # Return a bool signifying whether the module is a package or not. |
| 221 | def is_package(self, fullname): |
| 222 | """is_package(fullname) -> bool. |
| 223 | |
| 224 | Return True if the module specified by fullname is a package. |
| 225 | Raise ZipImportError if the module couldn't be found. |
| 226 | """ |
| 227 | mi = _get_module_info(self, fullname) |
| 228 | if mi is None: |
| 229 | raise ZipImportError(f"can't find module {fullname!r}", name=fullname) |
| 230 | return mi |
| 231 | |
| 232 | |
| 233 | # Load and return the module named by 'fullname'. |
| 234 | def load_module(self, fullname): |
| 235 | """load_module(fullname) -> module. |
| 236 | |
| 237 | Load the module specified by 'fullname'. 'fullname' must be the |
| 238 | fully qualified (dotted) module name. It returns the imported |
| 239 | module, or raises ZipImportError if it wasn't found. |
| 240 | """ |
| 241 | code, ispackage, modpath = _get_module_code(self, fullname) |
| 242 | mod = sys.modules.get(fullname) |
| 243 | if mod is None or not isinstance(mod, _module_type): |
| 244 | mod = _module_type(fullname) |
| 245 | sys.modules[fullname] = mod |
| 246 | mod.__loader__ = self |
| 247 | |
| 248 | try: |
| 249 | if ispackage: |
| 250 | # add __path__ to the module *before* the code gets |
| 251 | # executed |
| 252 | path = _get_module_path(self, fullname) |
| 253 | fullpath = _bootstrap_external._path_join(self.archive, path) |
| 254 | mod.__path__ = [fullpath] |
| 255 | |
| 256 | if not hasattr(mod, '__builtins__'): |
| 257 | mod.__builtins__ = __builtins__ |
| 258 | _bootstrap_external._fix_up_module(mod.__dict__, fullname, modpath) |
| 259 | exec(code, mod.__dict__) |
| 260 | except: |
| 261 | del sys.modules[fullname] |
| 262 | raise |
| 263 | |
| 264 | try: |
| 265 | mod = sys.modules[fullname] |
| 266 | except KeyError: |
| 267 | raise ImportError(f'Loaded module {fullname!r} not found in sys.modules') |
| 268 | _bootstrap._verbose_message('import {} # loaded from Zip {}', fullname, modpath) |
| 269 | return mod |
| 270 | |
| 271 | |
| 272 | def get_resource_reader(self, fullname): |
| 273 | """Return the ResourceReader for a package in a zip file. |
| 274 | |
| 275 | If 'fullname' is a package within the zip file, return the |
| 276 | 'ResourceReader' object for the package. Otherwise return None. |
| 277 | """ |
| 278 | try: |
| 279 | if not self.is_package(fullname): |
| 280 | return None |
| 281 | except ZipImportError: |
| 282 | return None |
| 283 | if not _ZipImportResourceReader._registered: |
| 284 | from importlib.abc import ResourceReader |
| 285 | ResourceReader.register(_ZipImportResourceReader) |
| 286 | _ZipImportResourceReader._registered = True |
| 287 | return _ZipImportResourceReader(self, fullname) |
| 288 | |
| 289 | |
| 290 | def __repr__(self): |
| 291 | return f'<zipimporter object "{self.archive}{path_sep}{self.prefix}">' |
| 292 | |
| 293 | |
| 294 | # _zip_searchorder defines how we search for a module in the Zip |
| 295 | # archive: we first search for a package __init__, then for |
| 296 | # non-package .pyc, and .py entries. The .pyc entries |
| 297 | # are swapped by initzipimport() if we run in optimized mode. Also, |
| 298 | # '/' is replaced by path_sep there. |
| 299 | _zip_searchorder = ( |
| 300 | (path_sep + '__init__.pyc', True, True), |
| 301 | (path_sep + '__init__.py', False, True), |
| 302 | ('.pyc', True, False), |
| 303 | ('.py', False, False), |
| 304 | ) |
| 305 | |
| 306 | # Given a module name, return the potential file path in the |
| 307 | # archive (without extension). |
| 308 | def _get_module_path(self, fullname): |
| 309 | return self.prefix + fullname.rpartition('.')[2] |
| 310 | |
| 311 | # Does this path represent a directory? |
| 312 | def _is_dir(self, path): |
| 313 | # See if this is a "directory". If so, it's eligible to be part |
| 314 | # of a namespace package. We test by seeing if the name, with an |
| 315 | # appended path separator, exists. |
| 316 | dirpath = path + path_sep |
| 317 | # If dirpath is present in self._files, we have a directory. |
| 318 | return dirpath in self._files |
| 319 | |
| 320 | # Return some information about a module. |
| 321 | def _get_module_info(self, fullname): |
| 322 | path = _get_module_path(self, fullname) |
| 323 | for suffix, isbytecode, ispackage in _zip_searchorder: |
| 324 | fullpath = path + suffix |
| 325 | if fullpath in self._files: |
| 326 | return ispackage |
| 327 | return None |
| 328 | |
| 329 | |
| 330 | # implementation |
| 331 | |
| 332 | # _read_directory(archive) -> files dict (new reference) |
| 333 | # |
| 334 | # Given a path to a Zip archive, build a dict, mapping file names |
| 335 | # (local to the archive, using SEP as a separator) to toc entries. |
| 336 | # |
| 337 | # A toc_entry is a tuple: |
| 338 | # |
| 339 | # (__file__, # value to use for __file__, available for all files, |
| 340 | # # encoded to the filesystem encoding |
| 341 | # compress, # compression kind; 0 for uncompressed |
| 342 | # data_size, # size of compressed data on disk |
| 343 | # file_size, # size of decompressed data |
| 344 | # file_offset, # offset of file header from start of archive |
| 345 | # time, # mod time of file (in dos format) |
| 346 | # date, # mod data of file (in dos format) |
| 347 | # crc, # crc checksum of the data |
| 348 | # ) |
| 349 | # |
| 350 | # Directories can be recognized by the trailing path_sep in the name, |
| 351 | # data_size and file_offset are 0. |
| 352 | def _read_directory(archive): |
| 353 | try: |
| 354 | fp = _io.open_code(archive) |
| 355 | except OSError: |
| 356 | raise ZipImportError(f"can't open Zip file: {archive!r}", path=archive) |
| 357 | |
| 358 | with fp: |
| 359 | try: |
| 360 | fp.seek(-END_CENTRAL_DIR_SIZE, 2) |
| 361 | header_position = fp.tell() |
| 362 | buffer = fp.read(END_CENTRAL_DIR_SIZE) |
| 363 | except OSError: |
| 364 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 365 | if len(buffer) != END_CENTRAL_DIR_SIZE: |
| 366 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 367 | if buffer[:4] != STRING_END_ARCHIVE: |
| 368 | # Bad: End of Central Dir signature |
| 369 | # Check if there's a comment. |
| 370 | try: |
| 371 | fp.seek(0, 2) |
| 372 | file_size = fp.tell() |
| 373 | except OSError: |
| 374 | raise ZipImportError(f"can't read Zip file: {archive!r}", |
| 375 | path=archive) |
| 376 | max_comment_start = max(file_size - MAX_COMMENT_LEN - |
| 377 | END_CENTRAL_DIR_SIZE, 0) |
| 378 | try: |
| 379 | fp.seek(max_comment_start) |
| 380 | data = fp.read() |
| 381 | except OSError: |
| 382 | raise ZipImportError(f"can't read Zip file: {archive!r}", |
| 383 | path=archive) |
| 384 | pos = data.rfind(STRING_END_ARCHIVE) |
| 385 | if pos < 0: |
| 386 | raise ZipImportError(f'not a Zip file: {archive!r}', |
| 387 | path=archive) |
| 388 | buffer = data[pos:pos+END_CENTRAL_DIR_SIZE] |
| 389 | if len(buffer) != END_CENTRAL_DIR_SIZE: |
| 390 | raise ZipImportError(f"corrupt Zip file: {archive!r}", |
| 391 | path=archive) |
| 392 | header_position = file_size - len(data) + pos |
| 393 | |
| 394 | header_size = _unpack_uint32(buffer[12:16]) |
| 395 | header_offset = _unpack_uint32(buffer[16:20]) |
| 396 | if header_position < header_size: |
| 397 | raise ZipImportError(f'bad central directory size: {archive!r}', path=archive) |
| 398 | if header_position < header_offset: |
| 399 | raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive) |
| 400 | header_position -= header_size |
| 401 | arc_offset = header_position - header_offset |
| 402 | if arc_offset < 0: |
| 403 | raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive) |
| 404 | |
| 405 | files = {} |
| 406 | # Start of Central Directory |
| 407 | count = 0 |
| 408 | try: |
| 409 | fp.seek(header_position) |
| 410 | except OSError: |
| 411 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 412 | while True: |
| 413 | buffer = fp.read(46) |
| 414 | if len(buffer) < 4: |
| 415 | raise EOFError('EOF read where not expected') |
| 416 | # Start of file header |
| 417 | if buffer[:4] != b'PK\x01\x02': |
| 418 | break # Bad: Central Dir File Header |
| 419 | if len(buffer) != 46: |
| 420 | raise EOFError('EOF read where not expected') |
| 421 | flags = _unpack_uint16(buffer[8:10]) |
| 422 | compress = _unpack_uint16(buffer[10:12]) |
| 423 | time = _unpack_uint16(buffer[12:14]) |
| 424 | date = _unpack_uint16(buffer[14:16]) |
| 425 | crc = _unpack_uint32(buffer[16:20]) |
| 426 | data_size = _unpack_uint32(buffer[20:24]) |
| 427 | file_size = _unpack_uint32(buffer[24:28]) |
| 428 | name_size = _unpack_uint16(buffer[28:30]) |
| 429 | extra_size = _unpack_uint16(buffer[30:32]) |
| 430 | comment_size = _unpack_uint16(buffer[32:34]) |
| 431 | file_offset = _unpack_uint32(buffer[42:46]) |
| 432 | header_size = name_size + extra_size + comment_size |
| 433 | if file_offset > header_offset: |
| 434 | raise ZipImportError(f'bad local header offset: {archive!r}', path=archive) |
| 435 | file_offset += arc_offset |
| 436 | |
| 437 | try: |
| 438 | name = fp.read(name_size) |
| 439 | except OSError: |
| 440 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 441 | if len(name) != name_size: |
| 442 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 443 | # On Windows, calling fseek to skip over the fields we don't use is |
| 444 | # slower than reading the data because fseek flushes stdio's |
| 445 | # internal buffers. See issue #8745. |
| 446 | try: |
| 447 | if len(fp.read(header_size - name_size)) != header_size - name_size: |
| 448 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 449 | except OSError: |
| 450 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 451 | |
| 452 | if flags & 0x800: |
| 453 | # UTF-8 file names extension |
| 454 | name = name.decode() |
| 455 | else: |
| 456 | # Historical ZIP filename encoding |
| 457 | try: |
| 458 | name = name.decode('ascii') |
| 459 | except UnicodeDecodeError: |
| 460 | name = name.decode('latin1').translate(cp437_table) |
| 461 | |
| 462 | name = name.replace('/', path_sep) |
| 463 | path = _bootstrap_external._path_join(archive, name) |
| 464 | t = (path, compress, data_size, file_size, file_offset, time, date, crc) |
| 465 | files[name] = t |
| 466 | count += 1 |
| 467 | _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive) |
| 468 | return files |
| 469 | |
| 470 | # During bootstrap, we may need to load the encodings |
| 471 | # package from a ZIP file. But the cp437 encoding is implemented |
| 472 | # in Python in the encodings package. |
| 473 | # |
| 474 | # Break out of this dependency by using the translation table for |
| 475 | # the cp437 encoding. |
| 476 | cp437_table = ( |
| 477 | # ASCII part, 8 rows x 16 chars |
| 478 | '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f' |
| 479 | '\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f' |
| 480 | ' !"#$%&\'()*+,-./' |
| 481 | '0123456789:;<=>?' |
| 482 | '@ABCDEFGHIJKLMNO' |
| 483 | 'PQRSTUVWXYZ[\\]^_' |
| 484 | '`abcdefghijklmno' |
| 485 | 'pqrstuvwxyz{|}~\x7f' |
| 486 | # non-ASCII part, 16 rows x 8 chars |
| 487 | '\xc7\xfc\xe9\xe2\xe4\xe0\xe5\xe7' |
| 488 | '\xea\xeb\xe8\xef\xee\xec\xc4\xc5' |
| 489 | '\xc9\xe6\xc6\xf4\xf6\xf2\xfb\xf9' |
| 490 | '\xff\xd6\xdc\xa2\xa3\xa5\u20a7\u0192' |
| 491 | '\xe1\xed\xf3\xfa\xf1\xd1\xaa\xba' |
| 492 | '\xbf\u2310\xac\xbd\xbc\xa1\xab\xbb' |
| 493 | '\u2591\u2592\u2593\u2502\u2524\u2561\u2562\u2556' |
| 494 | '\u2555\u2563\u2551\u2557\u255d\u255c\u255b\u2510' |
| 495 | '\u2514\u2534\u252c\u251c\u2500\u253c\u255e\u255f' |
| 496 | '\u255a\u2554\u2569\u2566\u2560\u2550\u256c\u2567' |
| 497 | '\u2568\u2564\u2565\u2559\u2558\u2552\u2553\u256b' |
| 498 | '\u256a\u2518\u250c\u2588\u2584\u258c\u2590\u2580' |
| 499 | '\u03b1\xdf\u0393\u03c0\u03a3\u03c3\xb5\u03c4' |
| 500 | '\u03a6\u0398\u03a9\u03b4\u221e\u03c6\u03b5\u2229' |
| 501 | '\u2261\xb1\u2265\u2264\u2320\u2321\xf7\u2248' |
| 502 | '\xb0\u2219\xb7\u221a\u207f\xb2\u25a0\xa0' |
| 503 | ) |
| 504 | |
| 505 | _importing_zlib = False |
| 506 | |
| 507 | # Return the zlib.decompress function object, or NULL if zlib couldn't |
| 508 | # be imported. The function is cached when found, so subsequent calls |
| 509 | # don't import zlib again. |
| 510 | def _get_decompress_func(): |
| 511 | global _importing_zlib |
| 512 | if _importing_zlib: |
| 513 | # Someone has a zlib.py[co] in their Zip file |
| 514 | # let's avoid a stack overflow. |
| 515 | _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE') |
| 516 | raise ZipImportError("can't decompress data; zlib not available") |
| 517 | |
| 518 | _importing_zlib = True |
| 519 | try: |
| 520 | from zlib import decompress |
| 521 | except Exception: |
| 522 | _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE') |
| 523 | raise ZipImportError("can't decompress data; zlib not available") |
| 524 | finally: |
| 525 | _importing_zlib = False |
| 526 | |
| 527 | _bootstrap._verbose_message('zipimport: zlib available') |
| 528 | return decompress |
| 529 | |
| 530 | # Given a path to a Zip file and a toc_entry, return the (uncompressed) data. |
| 531 | def _get_data(archive, toc_entry): |
| 532 | datapath, compress, data_size, file_size, file_offset, time, date, crc = toc_entry |
| 533 | if data_size < 0: |
| 534 | raise ZipImportError('negative data size') |
| 535 | |
| 536 | with _io.open_code(archive) as fp: |
| 537 | # Check to make sure the local file header is correct |
| 538 | try: |
| 539 | fp.seek(file_offset) |
| 540 | except OSError: |
| 541 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 542 | buffer = fp.read(30) |
| 543 | if len(buffer) != 30: |
| 544 | raise EOFError('EOF read where not expected') |
| 545 | |
| 546 | if buffer[:4] != b'PK\x03\x04': |
| 547 | # Bad: Local File Header |
| 548 | raise ZipImportError(f'bad local file header: {archive!r}', path=archive) |
| 549 | |
| 550 | name_size = _unpack_uint16(buffer[26:28]) |
| 551 | extra_size = _unpack_uint16(buffer[28:30]) |
| 552 | header_size = 30 + name_size + extra_size |
| 553 | file_offset += header_size # Start of file data |
| 554 | try: |
| 555 | fp.seek(file_offset) |
| 556 | except OSError: |
| 557 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 558 | raw_data = fp.read(data_size) |
| 559 | if len(raw_data) != data_size: |
| 560 | raise OSError("zipimport: can't read data") |
| 561 | |
| 562 | if compress == 0: |
| 563 | # data is not compressed |
| 564 | return raw_data |
| 565 | |
| 566 | # Decompress with zlib |
| 567 | try: |
| 568 | decompress = _get_decompress_func() |
| 569 | except Exception: |
| 570 | raise ZipImportError("can't decompress data; zlib not available") |
| 571 | return decompress(raw_data, -15) |
| 572 | |
| 573 | |
| 574 | # Lenient date/time comparison function. The precision of the mtime |
| 575 | # in the archive is lower than the mtime stored in a .pyc: we |
| 576 | # must allow a difference of at most one second. |
| 577 | def _eq_mtime(t1, t2): |
| 578 | # dostime only stores even seconds, so be lenient |
| 579 | return abs(t1 - t2) <= 1 |
| 580 | |
| 581 | |
| 582 | # Given the contents of a .py[co] file, unmarshal the data |
| 583 | # and return the code object. Return None if it the magic word doesn't |
| 584 | # match, or if the recorded .py[co] metadata does not match the source, |
| 585 | # (we do this instead of raising an exception as we fall back |
| 586 | # to .py if available and we don't want to mask other errors). |
| 587 | def _unmarshal_code(self, pathname, fullpath, fullname, data): |
| 588 | exc_details = { |
| 589 | 'name': fullname, |
| 590 | 'path': fullpath, |
| 591 | } |
| 592 | |
| 593 | try: |
| 594 | flags = _bootstrap_external._classify_pyc(data, fullname, exc_details) |
| 595 | except ImportError: |
| 596 | return None |
| 597 | |
| 598 | hash_based = flags & 0b1 != 0 |
| 599 | if hash_based: |
| 600 | check_source = flags & 0b10 != 0 |
| 601 | if (_imp.check_hash_based_pycs != 'never' and |
| 602 | (check_source or _imp.check_hash_based_pycs == 'always')): |
| 603 | source_bytes = _get_pyc_source(self, fullpath) |
| 604 | if source_bytes is not None: |
| 605 | source_hash = _imp.source_hash( |
| 606 | _bootstrap_external._RAW_MAGIC_NUMBER, |
| 607 | source_bytes, |
| 608 | ) |
| 609 | |
| 610 | try: |
| 611 | _bootstrap_external._validate_hash_pyc( |
| 612 | data, source_hash, fullname, exc_details) |
| 613 | except ImportError: |
| 614 | return None |
| 615 | else: |
| 616 | source_mtime, source_size = \ |
| 617 | _get_mtime_and_size_of_source(self, fullpath) |
| 618 | |
| 619 | if source_mtime: |
| 620 | # We don't use _bootstrap_external._validate_timestamp_pyc |
| 621 | # to allow for a more lenient timestamp check. |
| 622 | if (not _eq_mtime(_unpack_uint32(data[8:12]), source_mtime) or |
| 623 | _unpack_uint32(data[12:16]) != source_size): |
| 624 | _bootstrap._verbose_message( |
| 625 | f'bytecode is stale for {fullname!r}') |
| 626 | return None |
| 627 | |
| 628 | code = marshal.loads(data[16:]) |
| 629 | if not isinstance(code, _code_type): |
| 630 | raise TypeError(f'compiled module {pathname!r} is not a code object') |
| 631 | return code |
| 632 | |
| 633 | _code_type = type(_unmarshal_code.__code__) |
| 634 | |
| 635 | |
| 636 | # Replace any occurrences of '\r\n?' in the input string with '\n'. |
| 637 | # This converts DOS and Mac line endings to Unix line endings. |
| 638 | def _normalize_line_endings(source): |
| 639 | source = source.replace(b'\r\n', b'\n') |
| 640 | source = source.replace(b'\r', b'\n') |
| 641 | return source |
| 642 | |
| 643 | # Given a string buffer containing Python source code, compile it |
| 644 | # and return a code object. |
| 645 | def _compile_source(pathname, source): |
| 646 | source = _normalize_line_endings(source) |
| 647 | return compile(source, pathname, 'exec', dont_inherit=True) |
| 648 | |
| 649 | # Convert the date/time values found in the Zip archive to a value |
| 650 | # that's compatible with the time stamp stored in .pyc files. |
| 651 | def _parse_dostime(d, t): |
| 652 | return time.mktime(( |
| 653 | (d >> 9) + 1980, # bits 9..15: year |
| 654 | (d >> 5) & 0xF, # bits 5..8: month |
| 655 | d & 0x1F, # bits 0..4: day |
| 656 | t >> 11, # bits 11..15: hours |
| 657 | (t >> 5) & 0x3F, # bits 8..10: minutes |
| 658 | (t & 0x1F) * 2, # bits 0..7: seconds / 2 |
| 659 | -1, -1, -1)) |
| 660 | |
| 661 | # Given a path to a .pyc file in the archive, return the |
| 662 | # modification time of the matching .py file and its size, |
| 663 | # or (0, 0) if no source is available. |
| 664 | def _get_mtime_and_size_of_source(self, path): |
| 665 | try: |
| 666 | # strip 'c' or 'o' from *.py[co] |
| 667 | assert path[-1:] in ('c', 'o') |
| 668 | path = path[:-1] |
| 669 | toc_entry = self._files[path] |
| 670 | # fetch the time stamp of the .py file for comparison |
| 671 | # with an embedded pyc time stamp |
| 672 | time = toc_entry[5] |
| 673 | date = toc_entry[6] |
| 674 | uncompressed_size = toc_entry[3] |
| 675 | return _parse_dostime(date, time), uncompressed_size |
| 676 | except (KeyError, IndexError, TypeError): |
| 677 | return 0, 0 |
| 678 | |
| 679 | |
| 680 | # Given a path to a .pyc file in the archive, return the |
| 681 | # contents of the matching .py file, or None if no source |
| 682 | # is available. |
| 683 | def _get_pyc_source(self, path): |
| 684 | # strip 'c' or 'o' from *.py[co] |
| 685 | assert path[-1:] in ('c', 'o') |
| 686 | path = path[:-1] |
| 687 | |
| 688 | try: |
| 689 | toc_entry = self._files[path] |
| 690 | except KeyError: |
| 691 | return None |
| 692 | else: |
| 693 | return _get_data(self.archive, toc_entry) |
| 694 | |
| 695 | |
| 696 | # Get the code object associated with the module specified by |
| 697 | # 'fullname'. |
| 698 | def _get_module_code(self, fullname): |
| 699 | path = _get_module_path(self, fullname) |
| 700 | for suffix, isbytecode, ispackage in _zip_searchorder: |
| 701 | fullpath = path + suffix |
| 702 | _bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2) |
| 703 | try: |
| 704 | toc_entry = self._files[fullpath] |
| 705 | except KeyError: |
| 706 | pass |
| 707 | else: |
| 708 | modpath = toc_entry[0] |
| 709 | data = _get_data(self.archive, toc_entry) |
| 710 | if isbytecode: |
| 711 | code = _unmarshal_code(self, modpath, fullpath, fullname, data) |
| 712 | else: |
| 713 | code = _compile_source(modpath, data) |
| 714 | if code is None: |
| 715 | # bad magic number or non-matching mtime |
| 716 | # in byte code, try next |
| 717 | continue |
| 718 | modpath = toc_entry[0] |
| 719 | return code, ispackage, modpath |
| 720 | else: |
| 721 | raise ZipImportError(f"can't find module {fullname!r}", name=fullname) |
| 722 | |
| 723 | |
| 724 | class _ZipImportResourceReader: |
| 725 | """Private class used to support ZipImport.get_resource_reader(). |
| 726 | |
| 727 | This class is allowed to reference all the innards and private parts of |
| 728 | the zipimporter. |
| 729 | """ |
| 730 | _registered = False |
| 731 | |
| 732 | def __init__(self, zipimporter, fullname): |
| 733 | self.zipimporter = zipimporter |
| 734 | self.fullname = fullname |
| 735 | |
| 736 | def open_resource(self, resource): |
| 737 | fullname_as_path = self.fullname.replace('.', '/') |
| 738 | path = f'{fullname_as_path}/{resource}' |
| 739 | from io import BytesIO |
| 740 | try: |
| 741 | return BytesIO(self.zipimporter.get_data(path)) |
| 742 | except OSError: |
| 743 | raise FileNotFoundError(path) |
| 744 | |
| 745 | def resource_path(self, resource): |
| 746 | # All resources are in the zip file, so there is no path to the file. |
| 747 | # Raising FileNotFoundError tells the higher level API to extract the |
| 748 | # binary data and create a temporary file. |
| 749 | raise FileNotFoundError |
| 750 | |
| 751 | def is_resource(self, name): |
| 752 | # Maybe we could do better, but if we can get the data, it's a |
| 753 | # resource. Otherwise it isn't. |
| 754 | fullname_as_path = self.fullname.replace('.', '/') |
| 755 | path = f'{fullname_as_path}/{name}' |
| 756 | try: |
| 757 | self.zipimporter.get_data(path) |
| 758 | except OSError: |
| 759 | return False |
| 760 | return True |
| 761 | |
| 762 | def contents(self): |
| 763 | # This is a bit convoluted, because fullname will be a module path, |
| 764 | # but _files is a list of file names relative to the top of the |
| 765 | # archive's namespace. We want to compare file paths to find all the |
| 766 | # names of things inside the module represented by fullname. So we |
| 767 | # turn the module path of fullname into a file path relative to the |
| 768 | # top of the archive, and then we iterate through _files looking for |
| 769 | # names inside that "directory". |
| 770 | from pathlib import Path |
| 771 | fullname_path = Path(self.zipimporter.get_filename(self.fullname)) |
| 772 | relative_path = fullname_path.relative_to(self.zipimporter.archive) |
| 773 | # Don't forget that fullname names a package, so its path will include |
| 774 | # __init__.py, which we want to ignore. |
| 775 | assert relative_path.name == '__init__.py' |
| 776 | package_path = relative_path.parent |
| 777 | subdirs_seen = set() |
| 778 | for filename in self.zipimporter._files: |
| 779 | try: |
| 780 | relative = Path(filename).relative_to(package_path) |
| 781 | except ValueError: |
| 782 | continue |
| 783 | # If the path of the file (which is relative to the top of the zip |
| 784 | # namespace), relative to the package given when the resource |
| 785 | # reader was created, has a parent, then it's a name in a |
| 786 | # subdirectory and thus we skip it. |
| 787 | parent_name = relative.parent.name |
| 788 | if len(parent_name) == 0: |
| 789 | yield relative.name |
| 790 | elif parent_name not in subdirs_seen: |
| 791 | subdirs_seen.add(parent_name) |
| 792 | yield parent_name |