blob: e88d7a2c5a724a19c5e52f5e90a18fa334607b86 [file] [log] [blame]
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +03001"""zipimport provides support for importing Python modules from Zip archives.
2
3This module exports three objects:
4- zipimporter: a class; its constructor takes a path to a Zip archive.
5- ZipImportError: exception raised by zipimporter objects. It's a
6 subclass of ImportError, so it can be caught as ImportError, too.
7- _zip_directory_cache: a dict, mapping archive paths to zip directory
8 info dicts, as used in zipimporter._files.
9
10It is usually not needed to use the zipimport module explicitly; it is
11used by the builtin import mechanism for sys.path items that are paths
12to Zip archives.
13"""
14
15#from importlib import _bootstrap_external
16#from importlib import _bootstrap # for _verbose_message
17import _frozen_importlib_external as _bootstrap_external
18from _frozen_importlib_external import _unpack_uint16, _unpack_uint32
19import _frozen_importlib as _bootstrap # for _verbose_message
20import _imp # for check_hash_based_pycs
21import _io # for open
22import marshal # for loads
23import sys # for modules
24import time # for mktime
Brett Cannon2de50972020-12-04 15:39:21 -080025import _warnings # For warn()
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +030026
27__all__ = ['ZipImportError', 'zipimporter']
28
29
30path_sep = _bootstrap_external.path_sep
31alt_path_sep = _bootstrap_external.path_separators[1:]
32
33
34class ZipImportError(ImportError):
35 pass
36
37# _read_directory() cache
38_zip_directory_cache = {}
39
40_module_type = type(sys)
41
Zackery Spytz5a5ce062018-09-25 13:15:47 -060042END_CENTRAL_DIR_SIZE = 22
43STRING_END_ARCHIVE = b'PK\x05\x06'
44MAX_COMMENT_LEN = (1 << 16) - 1
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +030045
Brett Cannond2e94bb2020-11-13 15:14:58 -080046class zipimporter(_bootstrap_external._LoaderBasics):
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +030047 """zipimporter(archivepath) -> zipimporter object
48
49 Create a new zipimporter instance. 'archivepath' must be a path to
50 a zipfile, or to a specific path inside a zipfile. For example, it can be
51 '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a
52 valid directory inside the archive.
53
54 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip
55 archive.
56
57 The 'archive' attribute of zipimporter objects contains the name of the
58 zipfile targeted.
59 """
60
61 # Split the "subdirectory" from the Zip archive path, lookup a matching
62 # entry in sys.path_importer_cache, fetch the file directory from there
63 # if found, or else read it from the archive.
64 def __init__(self, path):
65 if not isinstance(path, str):
66 import os
67 path = os.fsdecode(path)
68 if not path:
69 raise ZipImportError('archive path is empty', path=path)
70 if alt_path_sep:
71 path = path.replace(alt_path_sep, path_sep)
72
73 prefix = []
74 while True:
75 try:
76 st = _bootstrap_external._path_stat(path)
77 except (OSError, ValueError):
78 # On Windows a ValueError is raised for too long paths.
79 # Back up one path element.
80 dirname, basename = _bootstrap_external._path_split(path)
81 if dirname == path:
82 raise ZipImportError('not a Zip file', path=path)
83 path = dirname
84 prefix.append(basename)
85 else:
86 # it exists
87 if (st.st_mode & 0o170000) != 0o100000: # stat.S_ISREG
88 # it's a not file
89 raise ZipImportError('not a Zip file', path=path)
90 break
91
92 try:
93 files = _zip_directory_cache[path]
94 except KeyError:
95 files = _read_directory(path)
96 _zip_directory_cache[path] = files
97 self._files = files
98 self.archive = path
99 # a prefix directory following the ZIP file path.
100 self.prefix = _bootstrap_external._path_join(*prefix[::-1])
101 if self.prefix:
102 self.prefix += path_sep
103
104
105 # Check whether we can satisfy the import of the module named by
106 # 'fullname', or whether it could be a portion of a namespace
107 # package. Return self if we can load it, a string containing the
108 # full path if it's a possible namespace portion, None if we
109 # can't load it.
110 def find_loader(self, fullname, path=None):
111 """find_loader(fullname, path=None) -> self, str or None.
112
113 Search for a module specified by 'fullname'. 'fullname' must be the
114 fully qualified (dotted) module name. It returns the zipimporter
115 instance itself if the module was found, a string containing the
116 full path name if it's possibly a portion of a namespace package,
117 or None otherwise. The optional 'path' argument is ignored -- it's
118 there for compatibility with the importer protocol.
Brett Cannond2e94bb2020-11-13 15:14:58 -0800119
120 Deprecated since Python 3.10. Use find_spec() instead.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300121 """
122 mi = _get_module_info(self, fullname)
123 if mi is not None:
124 # This is a module or package.
125 return self, []
126
127 # Not a module or regular package. See if this is a directory, and
128 # therefore possibly a portion of a namespace package.
129
130 # We're only interested in the last path component of fullname
131 # earlier components are recorded in self.prefix.
132 modpath = _get_module_path(self, fullname)
133 if _is_dir(self, modpath):
134 # This is possibly a portion of a namespace
135 # package. Return the string representing its path,
136 # without a trailing separator.
137 return None, [f'{self.archive}{path_sep}{modpath}']
138
139 return None, []
140
141
142 # Check whether we can satisfy the import of the module named by
143 # 'fullname'. Return self if we can, None if we can't.
144 def find_module(self, fullname, path=None):
145 """find_module(fullname, path=None) -> self or None.
146
147 Search for a module specified by 'fullname'. 'fullname' must be the
148 fully qualified (dotted) module name. It returns the zipimporter
149 instance itself if the module was found, or None if it wasn't.
150 The optional 'path' argument is ignored -- it's there for compatibility
151 with the importer protocol.
Brett Cannond2e94bb2020-11-13 15:14:58 -0800152
153 Deprecated since Python 3.10. Use find_spec() instead.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300154 """
155 return self.find_loader(fullname, path)[0]
156
Brett Cannond2e94bb2020-11-13 15:14:58 -0800157 def find_spec(self, fullname, target=None):
158 """Create a ModuleSpec for the specified module.
159
160 Returns None if the module cannot be found.
161 """
162 module_info = _get_module_info(self, fullname)
163 if module_info is not None:
164 return _bootstrap.spec_from_loader(fullname, self, is_package=module_info)
165 else:
166 # Not a module or regular package. See if this is a directory, and
167 # therefore possibly a portion of a namespace package.
168
169 # We're only interested in the last path component of fullname
170 # earlier components are recorded in self.prefix.
171 modpath = _get_module_path(self, fullname)
172 if _is_dir(self, modpath):
173 # This is possibly a portion of a namespace
174 # package. Return the string representing its path,
175 # without a trailing separator.
176 path = f'{self.archive}{path_sep}{modpath}'
177 spec = _bootstrap.ModuleSpec(name=fullname, loader=None,
178 is_package=True)
179 spec.submodule_search_locations.append(path)
180 return spec
181 else:
182 return None
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300183
184 def get_code(self, fullname):
185 """get_code(fullname) -> code object.
186
187 Return the code object for the specified module. Raise ZipImportError
Irit Katrielfb340962020-12-19 00:09:54 +0000188 if the module couldn't be imported.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300189 """
190 code, ispackage, modpath = _get_module_code(self, fullname)
191 return code
192
193
194 def get_data(self, pathname):
195 """get_data(pathname) -> string with file data.
196
197 Return the data associated with 'pathname'. Raise OSError if
198 the file wasn't found.
199 """
200 if alt_path_sep:
201 pathname = pathname.replace(alt_path_sep, path_sep)
202
203 key = pathname
204 if pathname.startswith(self.archive + path_sep):
205 key = pathname[len(self.archive + path_sep):]
206
207 try:
208 toc_entry = self._files[key]
209 except KeyError:
210 raise OSError(0, '', key)
211 return _get_data(self.archive, toc_entry)
212
213
214 # Return a string matching __file__ for the named module
215 def get_filename(self, fullname):
216 """get_filename(fullname) -> filename string.
217
Irit Katrielfb340962020-12-19 00:09:54 +0000218 Return the filename for the specified module or raise ZipImportError
219 if it couldn't be imported.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300220 """
221 # Deciding the filename requires working out where the code
222 # would come from if the module was actually loaded
223 code, ispackage, modpath = _get_module_code(self, fullname)
224 return modpath
225
226
227 def get_source(self, fullname):
228 """get_source(fullname) -> source string.
229
230 Return the source code for the specified module. Raise ZipImportError
231 if the module couldn't be found, return None if the archive does
232 contain the module, but has no source for it.
233 """
234 mi = _get_module_info(self, fullname)
235 if mi is None:
236 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
237
238 path = _get_module_path(self, fullname)
239 if mi:
240 fullpath = _bootstrap_external._path_join(path, '__init__.py')
241 else:
242 fullpath = f'{path}.py'
243
244 try:
245 toc_entry = self._files[fullpath]
246 except KeyError:
247 # we have the module, but no source
248 return None
249 return _get_data(self.archive, toc_entry).decode()
250
251
252 # Return a bool signifying whether the module is a package or not.
253 def is_package(self, fullname):
254 """is_package(fullname) -> bool.
255
256 Return True if the module specified by fullname is a package.
257 Raise ZipImportError if the module couldn't be found.
258 """
259 mi = _get_module_info(self, fullname)
260 if mi is None:
261 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
262 return mi
263
264
265 # Load and return the module named by 'fullname'.
266 def load_module(self, fullname):
267 """load_module(fullname) -> module.
268
269 Load the module specified by 'fullname'. 'fullname' must be the
270 fully qualified (dotted) module name. It returns the imported
Irit Katrielfb340962020-12-19 00:09:54 +0000271 module, or raises ZipImportError if it could not be imported.
Brett Cannond2e94bb2020-11-13 15:14:58 -0800272
Brett Cannon2de50972020-12-04 15:39:21 -0800273 Deprecated since Python 3.10. Use exec_module() instead.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300274 """
Brett Cannon2de50972020-12-04 15:39:21 -0800275 msg = ("zipimport.zipimporter.load_module() is deprecated and slated for "
276 "removal in Python 3.12; use exec_module() instead")
277 _warnings.warn(msg, DeprecationWarning)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300278 code, ispackage, modpath = _get_module_code(self, fullname)
279 mod = sys.modules.get(fullname)
280 if mod is None or not isinstance(mod, _module_type):
281 mod = _module_type(fullname)
282 sys.modules[fullname] = mod
283 mod.__loader__ = self
284
285 try:
286 if ispackage:
287 # add __path__ to the module *before* the code gets
288 # executed
289 path = _get_module_path(self, fullname)
290 fullpath = _bootstrap_external._path_join(self.archive, path)
291 mod.__path__ = [fullpath]
292
293 if not hasattr(mod, '__builtins__'):
294 mod.__builtins__ = __builtins__
295 _bootstrap_external._fix_up_module(mod.__dict__, fullname, modpath)
296 exec(code, mod.__dict__)
297 except:
298 del sys.modules[fullname]
299 raise
300
301 try:
302 mod = sys.modules[fullname]
303 except KeyError:
304 raise ImportError(f'Loaded module {fullname!r} not found in sys.modules')
305 _bootstrap._verbose_message('import {} # loaded from Zip {}', fullname, modpath)
306 return mod
307
308
309 def get_resource_reader(self, fullname):
310 """Return the ResourceReader for a package in a zip file.
311
312 If 'fullname' is a package within the zip file, return the
313 'ResourceReader' object for the package. Otherwise return None.
314 """
Serhiy Storchaka9da39612018-09-19 09:28:06 +0300315 try:
316 if not self.is_package(fullname):
317 return None
318 except ZipImportError:
319 return None
Jason R. Coombs843c2772020-06-07 21:00:51 -0400320 from importlib.readers import ZipReader
321 return ZipReader(self, fullname)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300322
323
Desmond Cheong3abf6f02021-03-09 04:06:02 +0800324 def invalidate_caches(self):
325 """Reload the file data of the archive path."""
326 try:
327 self._files = _read_directory(self.archive)
328 _zip_directory_cache[self.archive] = self._files
329 except ZipImportError:
330 _zip_directory_cache.pop(self.archive, None)
331 self._files = None
332
333
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300334 def __repr__(self):
335 return f'<zipimporter object "{self.archive}{path_sep}{self.prefix}">'
336
337
338# _zip_searchorder defines how we search for a module in the Zip
339# archive: we first search for a package __init__, then for
340# non-package .pyc, and .py entries. The .pyc entries
341# are swapped by initzipimport() if we run in optimized mode. Also,
342# '/' is replaced by path_sep there.
343_zip_searchorder = (
344 (path_sep + '__init__.pyc', True, True),
345 (path_sep + '__init__.py', False, True),
346 ('.pyc', True, False),
347 ('.py', False, False),
348)
349
350# Given a module name, return the potential file path in the
351# archive (without extension).
352def _get_module_path(self, fullname):
353 return self.prefix + fullname.rpartition('.')[2]
354
355# Does this path represent a directory?
356def _is_dir(self, path):
357 # See if this is a "directory". If so, it's eligible to be part
358 # of a namespace package. We test by seeing if the name, with an
359 # appended path separator, exists.
360 dirpath = path + path_sep
361 # If dirpath is present in self._files, we have a directory.
362 return dirpath in self._files
363
364# Return some information about a module.
365def _get_module_info(self, fullname):
366 path = _get_module_path(self, fullname)
367 for suffix, isbytecode, ispackage in _zip_searchorder:
368 fullpath = path + suffix
369 if fullpath in self._files:
370 return ispackage
371 return None
372
373
374# implementation
375
376# _read_directory(archive) -> files dict (new reference)
377#
378# Given a path to a Zip archive, build a dict, mapping file names
379# (local to the archive, using SEP as a separator) to toc entries.
380#
381# A toc_entry is a tuple:
382#
383# (__file__, # value to use for __file__, available for all files,
384# # encoded to the filesystem encoding
385# compress, # compression kind; 0 for uncompressed
386# data_size, # size of compressed data on disk
387# file_size, # size of decompressed data
388# file_offset, # offset of file header from start of archive
389# time, # mod time of file (in dos format)
390# date, # mod data of file (in dos format)
391# crc, # crc checksum of the data
392# )
393#
394# Directories can be recognized by the trailing path_sep in the name,
395# data_size and file_offset are 0.
396def _read_directory(archive):
397 try:
Steve Dowerb82e17e2019-05-23 08:45:22 -0700398 fp = _io.open_code(archive)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300399 except OSError:
400 raise ZipImportError(f"can't open Zip file: {archive!r}", path=archive)
401
402 with fp:
403 try:
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600404 fp.seek(-END_CENTRAL_DIR_SIZE, 2)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300405 header_position = fp.tell()
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600406 buffer = fp.read(END_CENTRAL_DIR_SIZE)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300407 except OSError:
408 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600409 if len(buffer) != END_CENTRAL_DIR_SIZE:
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300410 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600411 if buffer[:4] != STRING_END_ARCHIVE:
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300412 # Bad: End of Central Dir signature
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600413 # Check if there's a comment.
414 try:
415 fp.seek(0, 2)
416 file_size = fp.tell()
417 except OSError:
418 raise ZipImportError(f"can't read Zip file: {archive!r}",
419 path=archive)
420 max_comment_start = max(file_size - MAX_COMMENT_LEN -
421 END_CENTRAL_DIR_SIZE, 0)
422 try:
423 fp.seek(max_comment_start)
424 data = fp.read()
425 except OSError:
426 raise ZipImportError(f"can't read Zip file: {archive!r}",
427 path=archive)
428 pos = data.rfind(STRING_END_ARCHIVE)
429 if pos < 0:
430 raise ZipImportError(f'not a Zip file: {archive!r}',
431 path=archive)
432 buffer = data[pos:pos+END_CENTRAL_DIR_SIZE]
433 if len(buffer) != END_CENTRAL_DIR_SIZE:
434 raise ZipImportError(f"corrupt Zip file: {archive!r}",
435 path=archive)
436 header_position = file_size - len(data) + pos
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300437
438 header_size = _unpack_uint32(buffer[12:16])
439 header_offset = _unpack_uint32(buffer[16:20])
440 if header_position < header_size:
441 raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
442 if header_position < header_offset:
443 raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
444 header_position -= header_size
445 arc_offset = header_position - header_offset
446 if arc_offset < 0:
447 raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)
448
449 files = {}
450 # Start of Central Directory
451 count = 0
452 try:
453 fp.seek(header_position)
454 except OSError:
455 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
456 while True:
457 buffer = fp.read(46)
458 if len(buffer) < 4:
459 raise EOFError('EOF read where not expected')
460 # Start of file header
461 if buffer[:4] != b'PK\x01\x02':
462 break # Bad: Central Dir File Header
463 if len(buffer) != 46:
464 raise EOFError('EOF read where not expected')
465 flags = _unpack_uint16(buffer[8:10])
466 compress = _unpack_uint16(buffer[10:12])
467 time = _unpack_uint16(buffer[12:14])
468 date = _unpack_uint16(buffer[14:16])
469 crc = _unpack_uint32(buffer[16:20])
470 data_size = _unpack_uint32(buffer[20:24])
471 file_size = _unpack_uint32(buffer[24:28])
472 name_size = _unpack_uint16(buffer[28:30])
473 extra_size = _unpack_uint16(buffer[30:32])
474 comment_size = _unpack_uint16(buffer[32:34])
475 file_offset = _unpack_uint32(buffer[42:46])
476 header_size = name_size + extra_size + comment_size
477 if file_offset > header_offset:
478 raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
479 file_offset += arc_offset
480
481 try:
482 name = fp.read(name_size)
483 except OSError:
484 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
485 if len(name) != name_size:
486 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
487 # On Windows, calling fseek to skip over the fields we don't use is
488 # slower than reading the data because fseek flushes stdio's
489 # internal buffers. See issue #8745.
490 try:
491 if len(fp.read(header_size - name_size)) != header_size - name_size:
492 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
493 except OSError:
494 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
495
496 if flags & 0x800:
497 # UTF-8 file names extension
498 name = name.decode()
499 else:
500 # Historical ZIP filename encoding
501 try:
502 name = name.decode('ascii')
503 except UnicodeDecodeError:
504 name = name.decode('latin1').translate(cp437_table)
505
506 name = name.replace('/', path_sep)
507 path = _bootstrap_external._path_join(archive, name)
508 t = (path, compress, data_size, file_size, file_offset, time, date, crc)
509 files[name] = t
510 count += 1
511 _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive)
512 return files
513
514# During bootstrap, we may need to load the encodings
515# package from a ZIP file. But the cp437 encoding is implemented
516# in Python in the encodings package.
517#
518# Break out of this dependency by using the translation table for
519# the cp437 encoding.
520cp437_table = (
521 # ASCII part, 8 rows x 16 chars
522 '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
523 '\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'
524 ' !"#$%&\'()*+,-./'
525 '0123456789:;<=>?'
526 '@ABCDEFGHIJKLMNO'
527 'PQRSTUVWXYZ[\\]^_'
528 '`abcdefghijklmno'
529 'pqrstuvwxyz{|}~\x7f'
530 # non-ASCII part, 16 rows x 8 chars
531 '\xc7\xfc\xe9\xe2\xe4\xe0\xe5\xe7'
532 '\xea\xeb\xe8\xef\xee\xec\xc4\xc5'
533 '\xc9\xe6\xc6\xf4\xf6\xf2\xfb\xf9'
534 '\xff\xd6\xdc\xa2\xa3\xa5\u20a7\u0192'
535 '\xe1\xed\xf3\xfa\xf1\xd1\xaa\xba'
536 '\xbf\u2310\xac\xbd\xbc\xa1\xab\xbb'
537 '\u2591\u2592\u2593\u2502\u2524\u2561\u2562\u2556'
538 '\u2555\u2563\u2551\u2557\u255d\u255c\u255b\u2510'
539 '\u2514\u2534\u252c\u251c\u2500\u253c\u255e\u255f'
540 '\u255a\u2554\u2569\u2566\u2560\u2550\u256c\u2567'
541 '\u2568\u2564\u2565\u2559\u2558\u2552\u2553\u256b'
542 '\u256a\u2518\u250c\u2588\u2584\u258c\u2590\u2580'
543 '\u03b1\xdf\u0393\u03c0\u03a3\u03c3\xb5\u03c4'
544 '\u03a6\u0398\u03a9\u03b4\u221e\u03c6\u03b5\u2229'
545 '\u2261\xb1\u2265\u2264\u2320\u2321\xf7\u2248'
546 '\xb0\u2219\xb7\u221a\u207f\xb2\u25a0\xa0'
547)
548
549_importing_zlib = False
550
551# Return the zlib.decompress function object, or NULL if zlib couldn't
552# be imported. The function is cached when found, so subsequent calls
553# don't import zlib again.
554def _get_decompress_func():
555 global _importing_zlib
556 if _importing_zlib:
557 # Someone has a zlib.py[co] in their Zip file
558 # let's avoid a stack overflow.
559 _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
560 raise ZipImportError("can't decompress data; zlib not available")
561
562 _importing_zlib = True
563 try:
564 from zlib import decompress
565 except Exception:
566 _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
567 raise ZipImportError("can't decompress data; zlib not available")
568 finally:
569 _importing_zlib = False
570
571 _bootstrap._verbose_message('zipimport: zlib available')
572 return decompress
573
574# Given a path to a Zip file and a toc_entry, return the (uncompressed) data.
575def _get_data(archive, toc_entry):
576 datapath, compress, data_size, file_size, file_offset, time, date, crc = toc_entry
577 if data_size < 0:
578 raise ZipImportError('negative data size')
579
Steve Dowerb82e17e2019-05-23 08:45:22 -0700580 with _io.open_code(archive) as fp:
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300581 # Check to make sure the local file header is correct
582 try:
583 fp.seek(file_offset)
584 except OSError:
585 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
586 buffer = fp.read(30)
587 if len(buffer) != 30:
588 raise EOFError('EOF read where not expected')
589
590 if buffer[:4] != b'PK\x03\x04':
591 # Bad: Local File Header
592 raise ZipImportError(f'bad local file header: {archive!r}', path=archive)
593
594 name_size = _unpack_uint16(buffer[26:28])
595 extra_size = _unpack_uint16(buffer[28:30])
596 header_size = 30 + name_size + extra_size
597 file_offset += header_size # Start of file data
598 try:
599 fp.seek(file_offset)
600 except OSError:
601 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
602 raw_data = fp.read(data_size)
603 if len(raw_data) != data_size:
604 raise OSError("zipimport: can't read data")
605
606 if compress == 0:
607 # data is not compressed
608 return raw_data
609
610 # Decompress with zlib
611 try:
612 decompress = _get_decompress_func()
613 except Exception:
614 raise ZipImportError("can't decompress data; zlib not available")
615 return decompress(raw_data, -15)
616
617
618# Lenient date/time comparison function. The precision of the mtime
619# in the archive is lower than the mtime stored in a .pyc: we
620# must allow a difference of at most one second.
621def _eq_mtime(t1, t2):
622 # dostime only stores even seconds, so be lenient
623 return abs(t1 - t2) <= 1
624
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500625
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300626# Given the contents of a .py[co] file, unmarshal the data
Irit Katrielfb340962020-12-19 00:09:54 +0000627# and return the code object. Raises ImportError it the magic word doesn't
628# match, or if the recorded .py[co] metadata does not match the source.
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500629def _unmarshal_code(self, pathname, fullpath, fullname, data):
630 exc_details = {
631 'name': fullname,
632 'path': fullpath,
633 }
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300634
Irit Katrielfb340962020-12-19 00:09:54 +0000635 flags = _bootstrap_external._classify_pyc(data, fullname, exc_details)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300636
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500637 hash_based = flags & 0b1 != 0
638 if hash_based:
639 check_source = flags & 0b10 != 0
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300640 if (_imp.check_hash_based_pycs != 'never' and
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500641 (check_source or _imp.check_hash_based_pycs == 'always')):
642 source_bytes = _get_pyc_source(self, fullpath)
643 if source_bytes is not None:
644 source_hash = _imp.source_hash(
645 _bootstrap_external._RAW_MAGIC_NUMBER,
646 source_bytes,
647 )
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300648
Irit Katrielfb340962020-12-19 00:09:54 +0000649 _bootstrap_external._validate_hash_pyc(
650 data, source_hash, fullname, exc_details)
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500651 else:
652 source_mtime, source_size = \
653 _get_mtime_and_size_of_source(self, fullpath)
654
655 if source_mtime:
656 # We don't use _bootstrap_external._validate_timestamp_pyc
657 # to allow for a more lenient timestamp check.
658 if (not _eq_mtime(_unpack_uint32(data[8:12]), source_mtime) or
659 _unpack_uint32(data[12:16]) != source_size):
660 _bootstrap._verbose_message(
661 f'bytecode is stale for {fullname!r}')
662 return None
663
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300664 code = marshal.loads(data[16:])
665 if not isinstance(code, _code_type):
666 raise TypeError(f'compiled module {pathname!r} is not a code object')
667 return code
668
669_code_type = type(_unmarshal_code.__code__)
670
671
672# Replace any occurrences of '\r\n?' in the input string with '\n'.
673# This converts DOS and Mac line endings to Unix line endings.
674def _normalize_line_endings(source):
675 source = source.replace(b'\r\n', b'\n')
676 source = source.replace(b'\r', b'\n')
677 return source
678
679# Given a string buffer containing Python source code, compile it
680# and return a code object.
681def _compile_source(pathname, source):
682 source = _normalize_line_endings(source)
683 return compile(source, pathname, 'exec', dont_inherit=True)
684
685# Convert the date/time values found in the Zip archive to a value
686# that's compatible with the time stamp stored in .pyc files.
687def _parse_dostime(d, t):
688 return time.mktime((
689 (d >> 9) + 1980, # bits 9..15: year
690 (d >> 5) & 0xF, # bits 5..8: month
691 d & 0x1F, # bits 0..4: day
692 t >> 11, # bits 11..15: hours
693 (t >> 5) & 0x3F, # bits 8..10: minutes
694 (t & 0x1F) * 2, # bits 0..7: seconds / 2
695 -1, -1, -1))
696
697# Given a path to a .pyc file in the archive, return the
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500698# modification time of the matching .py file and its size,
699# or (0, 0) if no source is available.
700def _get_mtime_and_size_of_source(self, path):
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300701 try:
702 # strip 'c' or 'o' from *.py[co]
703 assert path[-1:] in ('c', 'o')
704 path = path[:-1]
705 toc_entry = self._files[path]
706 # fetch the time stamp of the .py file for comparison
707 # with an embedded pyc time stamp
708 time = toc_entry[5]
709 date = toc_entry[6]
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500710 uncompressed_size = toc_entry[3]
711 return _parse_dostime(date, time), uncompressed_size
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300712 except (KeyError, IndexError, TypeError):
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500713 return 0, 0
714
715
716# Given a path to a .pyc file in the archive, return the
717# contents of the matching .py file, or None if no source
718# is available.
719def _get_pyc_source(self, path):
720 # strip 'c' or 'o' from *.py[co]
721 assert path[-1:] in ('c', 'o')
722 path = path[:-1]
723
724 try:
725 toc_entry = self._files[path]
726 except KeyError:
727 return None
728 else:
729 return _get_data(self.archive, toc_entry)
730
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300731
732# Get the code object associated with the module specified by
733# 'fullname'.
734def _get_module_code(self, fullname):
735 path = _get_module_path(self, fullname)
Irit Katrielfb340962020-12-19 00:09:54 +0000736 import_error = None
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300737 for suffix, isbytecode, ispackage in _zip_searchorder:
738 fullpath = path + suffix
739 _bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2)
740 try:
741 toc_entry = self._files[fullpath]
742 except KeyError:
743 pass
744 else:
745 modpath = toc_entry[0]
746 data = _get_data(self.archive, toc_entry)
Irit Katrielfb340962020-12-19 00:09:54 +0000747 code = None
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300748 if isbytecode:
Irit Katrielfb340962020-12-19 00:09:54 +0000749 try:
750 code = _unmarshal_code(self, modpath, fullpath, fullname, data)
751 except ImportError as exc:
752 import_error = exc
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300753 else:
754 code = _compile_source(modpath, data)
755 if code is None:
756 # bad magic number or non-matching mtime
757 # in byte code, try next
758 continue
759 modpath = toc_entry[0]
760 return code, ispackage, modpath
761 else:
Irit Katrielfb340962020-12-19 00:09:54 +0000762 if import_error:
763 msg = f"module load failed: {import_error}"
764 raise ZipImportError(msg, name=fullname) from import_error
765 else:
766 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)