Serhiy Storchaka | 79d1c2e | 2018-09-18 22:22:29 +0300 | [diff] [blame^] | 1 | """zipimport provides support for importing Python modules from Zip archives. |
| 2 | |
| 3 | This module exports three objects: |
| 4 | - zipimporter: a class; its constructor takes a path to a Zip archive. |
| 5 | - ZipImportError: exception raised by zipimporter objects. It's a |
| 6 | subclass of ImportError, so it can be caught as ImportError, too. |
| 7 | - _zip_directory_cache: a dict, mapping archive paths to zip directory |
| 8 | info dicts, as used in zipimporter._files. |
| 9 | |
| 10 | It is usually not needed to use the zipimport module explicitly; it is |
| 11 | used by the builtin import mechanism for sys.path items that are paths |
| 12 | to Zip archives. |
| 13 | """ |
| 14 | |
| 15 | #from importlib import _bootstrap_external |
| 16 | #from importlib import _bootstrap # for _verbose_message |
| 17 | import _frozen_importlib_external as _bootstrap_external |
| 18 | from _frozen_importlib_external import _unpack_uint16, _unpack_uint32 |
| 19 | import _frozen_importlib as _bootstrap # for _verbose_message |
| 20 | import _imp # for check_hash_based_pycs |
| 21 | import _io # for open |
| 22 | import marshal # for loads |
| 23 | import sys # for modules |
| 24 | import time # for mktime |
| 25 | |
| 26 | __all__ = ['ZipImportError', 'zipimporter'] |
| 27 | |
| 28 | |
| 29 | path_sep = _bootstrap_external.path_sep |
| 30 | alt_path_sep = _bootstrap_external.path_separators[1:] |
| 31 | |
| 32 | |
| 33 | class ZipImportError(ImportError): |
| 34 | pass |
| 35 | |
| 36 | # _read_directory() cache |
| 37 | _zip_directory_cache = {} |
| 38 | |
| 39 | _module_type = type(sys) |
| 40 | |
| 41 | |
| 42 | class zipimporter: |
| 43 | """zipimporter(archivepath) -> zipimporter object |
| 44 | |
| 45 | Create a new zipimporter instance. 'archivepath' must be a path to |
| 46 | a zipfile, or to a specific path inside a zipfile. For example, it can be |
| 47 | '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a |
| 48 | valid directory inside the archive. |
| 49 | |
| 50 | 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip |
| 51 | archive. |
| 52 | |
| 53 | The 'archive' attribute of zipimporter objects contains the name of the |
| 54 | zipfile targeted. |
| 55 | """ |
| 56 | |
| 57 | # Split the "subdirectory" from the Zip archive path, lookup a matching |
| 58 | # entry in sys.path_importer_cache, fetch the file directory from there |
| 59 | # if found, or else read it from the archive. |
| 60 | def __init__(self, path): |
| 61 | if not isinstance(path, str): |
| 62 | import os |
| 63 | path = os.fsdecode(path) |
| 64 | if not path: |
| 65 | raise ZipImportError('archive path is empty', path=path) |
| 66 | if alt_path_sep: |
| 67 | path = path.replace(alt_path_sep, path_sep) |
| 68 | |
| 69 | prefix = [] |
| 70 | while True: |
| 71 | try: |
| 72 | st = _bootstrap_external._path_stat(path) |
| 73 | except (OSError, ValueError): |
| 74 | # On Windows a ValueError is raised for too long paths. |
| 75 | # Back up one path element. |
| 76 | dirname, basename = _bootstrap_external._path_split(path) |
| 77 | if dirname == path: |
| 78 | raise ZipImportError('not a Zip file', path=path) |
| 79 | path = dirname |
| 80 | prefix.append(basename) |
| 81 | else: |
| 82 | # it exists |
| 83 | if (st.st_mode & 0o170000) != 0o100000: # stat.S_ISREG |
| 84 | # it's a not file |
| 85 | raise ZipImportError('not a Zip file', path=path) |
| 86 | break |
| 87 | |
| 88 | try: |
| 89 | files = _zip_directory_cache[path] |
| 90 | except KeyError: |
| 91 | files = _read_directory(path) |
| 92 | _zip_directory_cache[path] = files |
| 93 | self._files = files |
| 94 | self.archive = path |
| 95 | # a prefix directory following the ZIP file path. |
| 96 | self.prefix = _bootstrap_external._path_join(*prefix[::-1]) |
| 97 | if self.prefix: |
| 98 | self.prefix += path_sep |
| 99 | |
| 100 | |
| 101 | # Check whether we can satisfy the import of the module named by |
| 102 | # 'fullname', or whether it could be a portion of a namespace |
| 103 | # package. Return self if we can load it, a string containing the |
| 104 | # full path if it's a possible namespace portion, None if we |
| 105 | # can't load it. |
| 106 | def find_loader(self, fullname, path=None): |
| 107 | """find_loader(fullname, path=None) -> self, str or None. |
| 108 | |
| 109 | Search for a module specified by 'fullname'. 'fullname' must be the |
| 110 | fully qualified (dotted) module name. It returns the zipimporter |
| 111 | instance itself if the module was found, a string containing the |
| 112 | full path name if it's possibly a portion of a namespace package, |
| 113 | or None otherwise. The optional 'path' argument is ignored -- it's |
| 114 | there for compatibility with the importer protocol. |
| 115 | """ |
| 116 | mi = _get_module_info(self, fullname) |
| 117 | if mi is not None: |
| 118 | # This is a module or package. |
| 119 | return self, [] |
| 120 | |
| 121 | # Not a module or regular package. See if this is a directory, and |
| 122 | # therefore possibly a portion of a namespace package. |
| 123 | |
| 124 | # We're only interested in the last path component of fullname |
| 125 | # earlier components are recorded in self.prefix. |
| 126 | modpath = _get_module_path(self, fullname) |
| 127 | if _is_dir(self, modpath): |
| 128 | # This is possibly a portion of a namespace |
| 129 | # package. Return the string representing its path, |
| 130 | # without a trailing separator. |
| 131 | return None, [f'{self.archive}{path_sep}{modpath}'] |
| 132 | |
| 133 | return None, [] |
| 134 | |
| 135 | |
| 136 | # Check whether we can satisfy the import of the module named by |
| 137 | # 'fullname'. Return self if we can, None if we can't. |
| 138 | def find_module(self, fullname, path=None): |
| 139 | """find_module(fullname, path=None) -> self or None. |
| 140 | |
| 141 | Search for a module specified by 'fullname'. 'fullname' must be the |
| 142 | fully qualified (dotted) module name. It returns the zipimporter |
| 143 | instance itself if the module was found, or None if it wasn't. |
| 144 | The optional 'path' argument is ignored -- it's there for compatibility |
| 145 | with the importer protocol. |
| 146 | """ |
| 147 | return self.find_loader(fullname, path)[0] |
| 148 | |
| 149 | |
| 150 | def get_code(self, fullname): |
| 151 | """get_code(fullname) -> code object. |
| 152 | |
| 153 | Return the code object for the specified module. Raise ZipImportError |
| 154 | if the module couldn't be found. |
| 155 | """ |
| 156 | code, ispackage, modpath = _get_module_code(self, fullname) |
| 157 | return code |
| 158 | |
| 159 | |
| 160 | def get_data(self, pathname): |
| 161 | """get_data(pathname) -> string with file data. |
| 162 | |
| 163 | Return the data associated with 'pathname'. Raise OSError if |
| 164 | the file wasn't found. |
| 165 | """ |
| 166 | if alt_path_sep: |
| 167 | pathname = pathname.replace(alt_path_sep, path_sep) |
| 168 | |
| 169 | key = pathname |
| 170 | if pathname.startswith(self.archive + path_sep): |
| 171 | key = pathname[len(self.archive + path_sep):] |
| 172 | |
| 173 | try: |
| 174 | toc_entry = self._files[key] |
| 175 | except KeyError: |
| 176 | raise OSError(0, '', key) |
| 177 | return _get_data(self.archive, toc_entry) |
| 178 | |
| 179 | |
| 180 | # Return a string matching __file__ for the named module |
| 181 | def get_filename(self, fullname): |
| 182 | """get_filename(fullname) -> filename string. |
| 183 | |
| 184 | Return the filename for the specified module. |
| 185 | """ |
| 186 | # Deciding the filename requires working out where the code |
| 187 | # would come from if the module was actually loaded |
| 188 | code, ispackage, modpath = _get_module_code(self, fullname) |
| 189 | return modpath |
| 190 | |
| 191 | |
| 192 | def get_source(self, fullname): |
| 193 | """get_source(fullname) -> source string. |
| 194 | |
| 195 | Return the source code for the specified module. Raise ZipImportError |
| 196 | if the module couldn't be found, return None if the archive does |
| 197 | contain the module, but has no source for it. |
| 198 | """ |
| 199 | mi = _get_module_info(self, fullname) |
| 200 | if mi is None: |
| 201 | raise ZipImportError(f"can't find module {fullname!r}", name=fullname) |
| 202 | |
| 203 | path = _get_module_path(self, fullname) |
| 204 | if mi: |
| 205 | fullpath = _bootstrap_external._path_join(path, '__init__.py') |
| 206 | else: |
| 207 | fullpath = f'{path}.py' |
| 208 | |
| 209 | try: |
| 210 | toc_entry = self._files[fullpath] |
| 211 | except KeyError: |
| 212 | # we have the module, but no source |
| 213 | return None |
| 214 | return _get_data(self.archive, toc_entry).decode() |
| 215 | |
| 216 | |
| 217 | # Return a bool signifying whether the module is a package or not. |
| 218 | def is_package(self, fullname): |
| 219 | """is_package(fullname) -> bool. |
| 220 | |
| 221 | Return True if the module specified by fullname is a package. |
| 222 | Raise ZipImportError if the module couldn't be found. |
| 223 | """ |
| 224 | mi = _get_module_info(self, fullname) |
| 225 | if mi is None: |
| 226 | raise ZipImportError(f"can't find module {fullname!r}", name=fullname) |
| 227 | return mi |
| 228 | |
| 229 | |
| 230 | # Load and return the module named by 'fullname'. |
| 231 | def load_module(self, fullname): |
| 232 | """load_module(fullname) -> module. |
| 233 | |
| 234 | Load the module specified by 'fullname'. 'fullname' must be the |
| 235 | fully qualified (dotted) module name. It returns the imported |
| 236 | module, or raises ZipImportError if it wasn't found. |
| 237 | """ |
| 238 | code, ispackage, modpath = _get_module_code(self, fullname) |
| 239 | mod = sys.modules.get(fullname) |
| 240 | if mod is None or not isinstance(mod, _module_type): |
| 241 | mod = _module_type(fullname) |
| 242 | sys.modules[fullname] = mod |
| 243 | mod.__loader__ = self |
| 244 | |
| 245 | try: |
| 246 | if ispackage: |
| 247 | # add __path__ to the module *before* the code gets |
| 248 | # executed |
| 249 | path = _get_module_path(self, fullname) |
| 250 | fullpath = _bootstrap_external._path_join(self.archive, path) |
| 251 | mod.__path__ = [fullpath] |
| 252 | |
| 253 | if not hasattr(mod, '__builtins__'): |
| 254 | mod.__builtins__ = __builtins__ |
| 255 | _bootstrap_external._fix_up_module(mod.__dict__, fullname, modpath) |
| 256 | exec(code, mod.__dict__) |
| 257 | except: |
| 258 | del sys.modules[fullname] |
| 259 | raise |
| 260 | |
| 261 | try: |
| 262 | mod = sys.modules[fullname] |
| 263 | except KeyError: |
| 264 | raise ImportError(f'Loaded module {fullname!r} not found in sys.modules') |
| 265 | _bootstrap._verbose_message('import {} # loaded from Zip {}', fullname, modpath) |
| 266 | return mod |
| 267 | |
| 268 | |
| 269 | def get_resource_reader(self, fullname): |
| 270 | """Return the ResourceReader for a package in a zip file. |
| 271 | |
| 272 | If 'fullname' is a package within the zip file, return the |
| 273 | 'ResourceReader' object for the package. Otherwise return None. |
| 274 | """ |
| 275 | from importlib import resources |
| 276 | return resources._zipimport_get_resource_reader(self, fullname) |
| 277 | |
| 278 | |
| 279 | def __repr__(self): |
| 280 | return f'<zipimporter object "{self.archive}{path_sep}{self.prefix}">' |
| 281 | |
| 282 | |
| 283 | # _zip_searchorder defines how we search for a module in the Zip |
| 284 | # archive: we first search for a package __init__, then for |
| 285 | # non-package .pyc, and .py entries. The .pyc entries |
| 286 | # are swapped by initzipimport() if we run in optimized mode. Also, |
| 287 | # '/' is replaced by path_sep there. |
| 288 | _zip_searchorder = ( |
| 289 | (path_sep + '__init__.pyc', True, True), |
| 290 | (path_sep + '__init__.py', False, True), |
| 291 | ('.pyc', True, False), |
| 292 | ('.py', False, False), |
| 293 | ) |
| 294 | |
| 295 | # Given a module name, return the potential file path in the |
| 296 | # archive (without extension). |
| 297 | def _get_module_path(self, fullname): |
| 298 | return self.prefix + fullname.rpartition('.')[2] |
| 299 | |
| 300 | # Does this path represent a directory? |
| 301 | def _is_dir(self, path): |
| 302 | # See if this is a "directory". If so, it's eligible to be part |
| 303 | # of a namespace package. We test by seeing if the name, with an |
| 304 | # appended path separator, exists. |
| 305 | dirpath = path + path_sep |
| 306 | # If dirpath is present in self._files, we have a directory. |
| 307 | return dirpath in self._files |
| 308 | |
| 309 | # Return some information about a module. |
| 310 | def _get_module_info(self, fullname): |
| 311 | path = _get_module_path(self, fullname) |
| 312 | for suffix, isbytecode, ispackage in _zip_searchorder: |
| 313 | fullpath = path + suffix |
| 314 | if fullpath in self._files: |
| 315 | return ispackage |
| 316 | return None |
| 317 | |
| 318 | |
| 319 | # implementation |
| 320 | |
| 321 | # _read_directory(archive) -> files dict (new reference) |
| 322 | # |
| 323 | # Given a path to a Zip archive, build a dict, mapping file names |
| 324 | # (local to the archive, using SEP as a separator) to toc entries. |
| 325 | # |
| 326 | # A toc_entry is a tuple: |
| 327 | # |
| 328 | # (__file__, # value to use for __file__, available for all files, |
| 329 | # # encoded to the filesystem encoding |
| 330 | # compress, # compression kind; 0 for uncompressed |
| 331 | # data_size, # size of compressed data on disk |
| 332 | # file_size, # size of decompressed data |
| 333 | # file_offset, # offset of file header from start of archive |
| 334 | # time, # mod time of file (in dos format) |
| 335 | # date, # mod data of file (in dos format) |
| 336 | # crc, # crc checksum of the data |
| 337 | # ) |
| 338 | # |
| 339 | # Directories can be recognized by the trailing path_sep in the name, |
| 340 | # data_size and file_offset are 0. |
| 341 | def _read_directory(archive): |
| 342 | try: |
| 343 | fp = _io.open(archive, 'rb') |
| 344 | except OSError: |
| 345 | raise ZipImportError(f"can't open Zip file: {archive!r}", path=archive) |
| 346 | |
| 347 | with fp: |
| 348 | try: |
| 349 | fp.seek(-22, 2) |
| 350 | header_position = fp.tell() |
| 351 | buffer = fp.read(22) |
| 352 | except OSError: |
| 353 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 354 | if len(buffer) != 22: |
| 355 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 356 | if buffer[:4] != b'PK\x05\x06': |
| 357 | # Bad: End of Central Dir signature |
| 358 | raise ZipImportError(f'not a Zip file: {archive!r}', path=archive) |
| 359 | |
| 360 | header_size = _unpack_uint32(buffer[12:16]) |
| 361 | header_offset = _unpack_uint32(buffer[16:20]) |
| 362 | if header_position < header_size: |
| 363 | raise ZipImportError(f'bad central directory size: {archive!r}', path=archive) |
| 364 | if header_position < header_offset: |
| 365 | raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive) |
| 366 | header_position -= header_size |
| 367 | arc_offset = header_position - header_offset |
| 368 | if arc_offset < 0: |
| 369 | raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive) |
| 370 | |
| 371 | files = {} |
| 372 | # Start of Central Directory |
| 373 | count = 0 |
| 374 | try: |
| 375 | fp.seek(header_position) |
| 376 | except OSError: |
| 377 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 378 | while True: |
| 379 | buffer = fp.read(46) |
| 380 | if len(buffer) < 4: |
| 381 | raise EOFError('EOF read where not expected') |
| 382 | # Start of file header |
| 383 | if buffer[:4] != b'PK\x01\x02': |
| 384 | break # Bad: Central Dir File Header |
| 385 | if len(buffer) != 46: |
| 386 | raise EOFError('EOF read where not expected') |
| 387 | flags = _unpack_uint16(buffer[8:10]) |
| 388 | compress = _unpack_uint16(buffer[10:12]) |
| 389 | time = _unpack_uint16(buffer[12:14]) |
| 390 | date = _unpack_uint16(buffer[14:16]) |
| 391 | crc = _unpack_uint32(buffer[16:20]) |
| 392 | data_size = _unpack_uint32(buffer[20:24]) |
| 393 | file_size = _unpack_uint32(buffer[24:28]) |
| 394 | name_size = _unpack_uint16(buffer[28:30]) |
| 395 | extra_size = _unpack_uint16(buffer[30:32]) |
| 396 | comment_size = _unpack_uint16(buffer[32:34]) |
| 397 | file_offset = _unpack_uint32(buffer[42:46]) |
| 398 | header_size = name_size + extra_size + comment_size |
| 399 | if file_offset > header_offset: |
| 400 | raise ZipImportError(f'bad local header offset: {archive!r}', path=archive) |
| 401 | file_offset += arc_offset |
| 402 | |
| 403 | try: |
| 404 | name = fp.read(name_size) |
| 405 | except OSError: |
| 406 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 407 | if len(name) != name_size: |
| 408 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 409 | # On Windows, calling fseek to skip over the fields we don't use is |
| 410 | # slower than reading the data because fseek flushes stdio's |
| 411 | # internal buffers. See issue #8745. |
| 412 | try: |
| 413 | if len(fp.read(header_size - name_size)) != header_size - name_size: |
| 414 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 415 | except OSError: |
| 416 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 417 | |
| 418 | if flags & 0x800: |
| 419 | # UTF-8 file names extension |
| 420 | name = name.decode() |
| 421 | else: |
| 422 | # Historical ZIP filename encoding |
| 423 | try: |
| 424 | name = name.decode('ascii') |
| 425 | except UnicodeDecodeError: |
| 426 | name = name.decode('latin1').translate(cp437_table) |
| 427 | |
| 428 | name = name.replace('/', path_sep) |
| 429 | path = _bootstrap_external._path_join(archive, name) |
| 430 | t = (path, compress, data_size, file_size, file_offset, time, date, crc) |
| 431 | files[name] = t |
| 432 | count += 1 |
| 433 | _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive) |
| 434 | return files |
| 435 | |
| 436 | # During bootstrap, we may need to load the encodings |
| 437 | # package from a ZIP file. But the cp437 encoding is implemented |
| 438 | # in Python in the encodings package. |
| 439 | # |
| 440 | # Break out of this dependency by using the translation table for |
| 441 | # the cp437 encoding. |
| 442 | cp437_table = ( |
| 443 | # ASCII part, 8 rows x 16 chars |
| 444 | '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f' |
| 445 | '\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f' |
| 446 | ' !"#$%&\'()*+,-./' |
| 447 | '0123456789:;<=>?' |
| 448 | '@ABCDEFGHIJKLMNO' |
| 449 | 'PQRSTUVWXYZ[\\]^_' |
| 450 | '`abcdefghijklmno' |
| 451 | 'pqrstuvwxyz{|}~\x7f' |
| 452 | # non-ASCII part, 16 rows x 8 chars |
| 453 | '\xc7\xfc\xe9\xe2\xe4\xe0\xe5\xe7' |
| 454 | '\xea\xeb\xe8\xef\xee\xec\xc4\xc5' |
| 455 | '\xc9\xe6\xc6\xf4\xf6\xf2\xfb\xf9' |
| 456 | '\xff\xd6\xdc\xa2\xa3\xa5\u20a7\u0192' |
| 457 | '\xe1\xed\xf3\xfa\xf1\xd1\xaa\xba' |
| 458 | '\xbf\u2310\xac\xbd\xbc\xa1\xab\xbb' |
| 459 | '\u2591\u2592\u2593\u2502\u2524\u2561\u2562\u2556' |
| 460 | '\u2555\u2563\u2551\u2557\u255d\u255c\u255b\u2510' |
| 461 | '\u2514\u2534\u252c\u251c\u2500\u253c\u255e\u255f' |
| 462 | '\u255a\u2554\u2569\u2566\u2560\u2550\u256c\u2567' |
| 463 | '\u2568\u2564\u2565\u2559\u2558\u2552\u2553\u256b' |
| 464 | '\u256a\u2518\u250c\u2588\u2584\u258c\u2590\u2580' |
| 465 | '\u03b1\xdf\u0393\u03c0\u03a3\u03c3\xb5\u03c4' |
| 466 | '\u03a6\u0398\u03a9\u03b4\u221e\u03c6\u03b5\u2229' |
| 467 | '\u2261\xb1\u2265\u2264\u2320\u2321\xf7\u2248' |
| 468 | '\xb0\u2219\xb7\u221a\u207f\xb2\u25a0\xa0' |
| 469 | ) |
| 470 | |
| 471 | _importing_zlib = False |
| 472 | |
| 473 | # Return the zlib.decompress function object, or NULL if zlib couldn't |
| 474 | # be imported. The function is cached when found, so subsequent calls |
| 475 | # don't import zlib again. |
| 476 | def _get_decompress_func(): |
| 477 | global _importing_zlib |
| 478 | if _importing_zlib: |
| 479 | # Someone has a zlib.py[co] in their Zip file |
| 480 | # let's avoid a stack overflow. |
| 481 | _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE') |
| 482 | raise ZipImportError("can't decompress data; zlib not available") |
| 483 | |
| 484 | _importing_zlib = True |
| 485 | try: |
| 486 | from zlib import decompress |
| 487 | except Exception: |
| 488 | _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE') |
| 489 | raise ZipImportError("can't decompress data; zlib not available") |
| 490 | finally: |
| 491 | _importing_zlib = False |
| 492 | |
| 493 | _bootstrap._verbose_message('zipimport: zlib available') |
| 494 | return decompress |
| 495 | |
| 496 | # Given a path to a Zip file and a toc_entry, return the (uncompressed) data. |
| 497 | def _get_data(archive, toc_entry): |
| 498 | datapath, compress, data_size, file_size, file_offset, time, date, crc = toc_entry |
| 499 | if data_size < 0: |
| 500 | raise ZipImportError('negative data size') |
| 501 | |
| 502 | with _io.open(archive, 'rb') as fp: |
| 503 | # Check to make sure the local file header is correct |
| 504 | try: |
| 505 | fp.seek(file_offset) |
| 506 | except OSError: |
| 507 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 508 | buffer = fp.read(30) |
| 509 | if len(buffer) != 30: |
| 510 | raise EOFError('EOF read where not expected') |
| 511 | |
| 512 | if buffer[:4] != b'PK\x03\x04': |
| 513 | # Bad: Local File Header |
| 514 | raise ZipImportError(f'bad local file header: {archive!r}', path=archive) |
| 515 | |
| 516 | name_size = _unpack_uint16(buffer[26:28]) |
| 517 | extra_size = _unpack_uint16(buffer[28:30]) |
| 518 | header_size = 30 + name_size + extra_size |
| 519 | file_offset += header_size # Start of file data |
| 520 | try: |
| 521 | fp.seek(file_offset) |
| 522 | except OSError: |
| 523 | raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive) |
| 524 | raw_data = fp.read(data_size) |
| 525 | if len(raw_data) != data_size: |
| 526 | raise OSError("zipimport: can't read data") |
| 527 | |
| 528 | if compress == 0: |
| 529 | # data is not compressed |
| 530 | return raw_data |
| 531 | |
| 532 | # Decompress with zlib |
| 533 | try: |
| 534 | decompress = _get_decompress_func() |
| 535 | except Exception: |
| 536 | raise ZipImportError("can't decompress data; zlib not available") |
| 537 | return decompress(raw_data, -15) |
| 538 | |
| 539 | |
| 540 | # Lenient date/time comparison function. The precision of the mtime |
| 541 | # in the archive is lower than the mtime stored in a .pyc: we |
| 542 | # must allow a difference of at most one second. |
| 543 | def _eq_mtime(t1, t2): |
| 544 | # dostime only stores even seconds, so be lenient |
| 545 | return abs(t1 - t2) <= 1 |
| 546 | |
| 547 | # Given the contents of a .py[co] file, unmarshal the data |
| 548 | # and return the code object. Return None if it the magic word doesn't |
| 549 | # match (we do this instead of raising an exception as we fall back |
| 550 | # to .py if available and we don't want to mask other errors). |
| 551 | def _unmarshal_code(pathname, data, mtime): |
| 552 | if len(data) < 16: |
| 553 | raise ZipImportError('bad pyc data') |
| 554 | |
| 555 | if data[:4] != _bootstrap_external.MAGIC_NUMBER: |
| 556 | _bootstrap._verbose_message('{!r} has bad magic', pathname) |
| 557 | return None # signal caller to try alternative |
| 558 | |
| 559 | flags = _unpack_uint32(data[4:8]) |
| 560 | if flags != 0: |
| 561 | # Hash-based pyc. We currently refuse to handle checked hash-based |
| 562 | # pycs. We could validate hash-based pycs against the source, but it |
| 563 | # seems likely that most people putting hash-based pycs in a zipfile |
| 564 | # will use unchecked ones. |
| 565 | if (_imp.check_hash_based_pycs != 'never' and |
| 566 | (flags != 0x1 or _imp.check_hash_based_pycs == 'always')): |
| 567 | return None |
| 568 | elif mtime != 0 and not _eq_mtime(_unpack_uint32(data[8:12]), mtime): |
| 569 | _bootstrap._verbose_message('{!r} has bad mtime', pathname) |
| 570 | return None # signal caller to try alternative |
| 571 | |
| 572 | # XXX the pyc's size field is ignored; timestamp collisions are probably |
| 573 | # unimportant with zip files. |
| 574 | code = marshal.loads(data[16:]) |
| 575 | if not isinstance(code, _code_type): |
| 576 | raise TypeError(f'compiled module {pathname!r} is not a code object') |
| 577 | return code |
| 578 | |
| 579 | _code_type = type(_unmarshal_code.__code__) |
| 580 | |
| 581 | |
| 582 | # Replace any occurrences of '\r\n?' in the input string with '\n'. |
| 583 | # This converts DOS and Mac line endings to Unix line endings. |
| 584 | def _normalize_line_endings(source): |
| 585 | source = source.replace(b'\r\n', b'\n') |
| 586 | source = source.replace(b'\r', b'\n') |
| 587 | return source |
| 588 | |
| 589 | # Given a string buffer containing Python source code, compile it |
| 590 | # and return a code object. |
| 591 | def _compile_source(pathname, source): |
| 592 | source = _normalize_line_endings(source) |
| 593 | return compile(source, pathname, 'exec', dont_inherit=True) |
| 594 | |
| 595 | # Convert the date/time values found in the Zip archive to a value |
| 596 | # that's compatible with the time stamp stored in .pyc files. |
| 597 | def _parse_dostime(d, t): |
| 598 | return time.mktime(( |
| 599 | (d >> 9) + 1980, # bits 9..15: year |
| 600 | (d >> 5) & 0xF, # bits 5..8: month |
| 601 | d & 0x1F, # bits 0..4: day |
| 602 | t >> 11, # bits 11..15: hours |
| 603 | (t >> 5) & 0x3F, # bits 8..10: minutes |
| 604 | (t & 0x1F) * 2, # bits 0..7: seconds / 2 |
| 605 | -1, -1, -1)) |
| 606 | |
| 607 | # Given a path to a .pyc file in the archive, return the |
| 608 | # modification time of the matching .py file, or 0 if no source |
| 609 | # is available. |
| 610 | def _get_mtime_of_source(self, path): |
| 611 | try: |
| 612 | # strip 'c' or 'o' from *.py[co] |
| 613 | assert path[-1:] in ('c', 'o') |
| 614 | path = path[:-1] |
| 615 | toc_entry = self._files[path] |
| 616 | # fetch the time stamp of the .py file for comparison |
| 617 | # with an embedded pyc time stamp |
| 618 | time = toc_entry[5] |
| 619 | date = toc_entry[6] |
| 620 | return _parse_dostime(date, time) |
| 621 | except (KeyError, IndexError, TypeError): |
| 622 | return 0 |
| 623 | |
| 624 | # Get the code object associated with the module specified by |
| 625 | # 'fullname'. |
| 626 | def _get_module_code(self, fullname): |
| 627 | path = _get_module_path(self, fullname) |
| 628 | for suffix, isbytecode, ispackage in _zip_searchorder: |
| 629 | fullpath = path + suffix |
| 630 | _bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2) |
| 631 | try: |
| 632 | toc_entry = self._files[fullpath] |
| 633 | except KeyError: |
| 634 | pass |
| 635 | else: |
| 636 | modpath = toc_entry[0] |
| 637 | data = _get_data(self.archive, toc_entry) |
| 638 | if isbytecode: |
| 639 | mtime = _get_mtime_of_source(self, fullpath) |
| 640 | code = _unmarshal_code(modpath, data, mtime) |
| 641 | else: |
| 642 | code = _compile_source(modpath, data) |
| 643 | if code is None: |
| 644 | # bad magic number or non-matching mtime |
| 645 | # in byte code, try next |
| 646 | continue |
| 647 | modpath = toc_entry[0] |
| 648 | return code, ispackage, modpath |
| 649 | else: |
| 650 | raise ZipImportError(f"can't find module {fullname!r}", name=fullname) |