blob: ce3e00e24faf26d28ef19d3a25a1ba14759d36ce [file] [log] [blame]
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +03001"""zipimport provides support for importing Python modules from Zip archives.
2
3This module exports three objects:
4- zipimporter: a class; its constructor takes a path to a Zip archive.
5- ZipImportError: exception raised by zipimporter objects. It's a
6 subclass of ImportError, so it can be caught as ImportError, too.
7- _zip_directory_cache: a dict, mapping archive paths to zip directory
8 info dicts, as used in zipimporter._files.
9
10It is usually not needed to use the zipimport module explicitly; it is
11used by the builtin import mechanism for sys.path items that are paths
12to Zip archives.
13"""
14
15#from importlib import _bootstrap_external
16#from importlib import _bootstrap # for _verbose_message
17import _frozen_importlib_external as _bootstrap_external
18from _frozen_importlib_external import _unpack_uint16, _unpack_uint32
19import _frozen_importlib as _bootstrap # for _verbose_message
20import _imp # for check_hash_based_pycs
21import _io # for open
22import marshal # for loads
23import sys # for modules
24import time # for mktime
Brett Cannon2de50972020-12-04 15:39:21 -080025import _warnings # For warn()
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +030026
27__all__ = ['ZipImportError', 'zipimporter']
28
29
30path_sep = _bootstrap_external.path_sep
31alt_path_sep = _bootstrap_external.path_separators[1:]
32
33
34class ZipImportError(ImportError):
35 pass
36
37# _read_directory() cache
38_zip_directory_cache = {}
39
40_module_type = type(sys)
41
Zackery Spytz5a5ce062018-09-25 13:15:47 -060042END_CENTRAL_DIR_SIZE = 22
43STRING_END_ARCHIVE = b'PK\x05\x06'
44MAX_COMMENT_LEN = (1 << 16) - 1
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +030045
Brett Cannond2e94bb2020-11-13 15:14:58 -080046class zipimporter(_bootstrap_external._LoaderBasics):
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +030047 """zipimporter(archivepath) -> zipimporter object
48
49 Create a new zipimporter instance. 'archivepath' must be a path to
50 a zipfile, or to a specific path inside a zipfile. For example, it can be
51 '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a
52 valid directory inside the archive.
53
54 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip
55 archive.
56
57 The 'archive' attribute of zipimporter objects contains the name of the
58 zipfile targeted.
59 """
60
61 # Split the "subdirectory" from the Zip archive path, lookup a matching
62 # entry in sys.path_importer_cache, fetch the file directory from there
63 # if found, or else read it from the archive.
64 def __init__(self, path):
65 if not isinstance(path, str):
66 import os
67 path = os.fsdecode(path)
68 if not path:
69 raise ZipImportError('archive path is empty', path=path)
70 if alt_path_sep:
71 path = path.replace(alt_path_sep, path_sep)
72
73 prefix = []
74 while True:
75 try:
76 st = _bootstrap_external._path_stat(path)
77 except (OSError, ValueError):
78 # On Windows a ValueError is raised for too long paths.
79 # Back up one path element.
80 dirname, basename = _bootstrap_external._path_split(path)
81 if dirname == path:
82 raise ZipImportError('not a Zip file', path=path)
83 path = dirname
84 prefix.append(basename)
85 else:
86 # it exists
87 if (st.st_mode & 0o170000) != 0o100000: # stat.S_ISREG
88 # it's a not file
89 raise ZipImportError('not a Zip file', path=path)
90 break
91
92 try:
93 files = _zip_directory_cache[path]
94 except KeyError:
95 files = _read_directory(path)
96 _zip_directory_cache[path] = files
97 self._files = files
98 self.archive = path
99 # a prefix directory following the ZIP file path.
100 self.prefix = _bootstrap_external._path_join(*prefix[::-1])
101 if self.prefix:
102 self.prefix += path_sep
103
104
105 # Check whether we can satisfy the import of the module named by
106 # 'fullname', or whether it could be a portion of a namespace
107 # package. Return self if we can load it, a string containing the
108 # full path if it's a possible namespace portion, None if we
109 # can't load it.
110 def find_loader(self, fullname, path=None):
111 """find_loader(fullname, path=None) -> self, str or None.
112
113 Search for a module specified by 'fullname'. 'fullname' must be the
114 fully qualified (dotted) module name. It returns the zipimporter
115 instance itself if the module was found, a string containing the
116 full path name if it's possibly a portion of a namespace package,
117 or None otherwise. The optional 'path' argument is ignored -- it's
118 there for compatibility with the importer protocol.
Brett Cannond2e94bb2020-11-13 15:14:58 -0800119
120 Deprecated since Python 3.10. Use find_spec() instead.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300121 """
122 mi = _get_module_info(self, fullname)
123 if mi is not None:
124 # This is a module or package.
125 return self, []
126
127 # Not a module or regular package. See if this is a directory, and
128 # therefore possibly a portion of a namespace package.
129
130 # We're only interested in the last path component of fullname
131 # earlier components are recorded in self.prefix.
132 modpath = _get_module_path(self, fullname)
133 if _is_dir(self, modpath):
134 # This is possibly a portion of a namespace
135 # package. Return the string representing its path,
136 # without a trailing separator.
137 return None, [f'{self.archive}{path_sep}{modpath}']
138
139 return None, []
140
141
142 # Check whether we can satisfy the import of the module named by
143 # 'fullname'. Return self if we can, None if we can't.
144 def find_module(self, fullname, path=None):
145 """find_module(fullname, path=None) -> self or None.
146
147 Search for a module specified by 'fullname'. 'fullname' must be the
148 fully qualified (dotted) module name. It returns the zipimporter
149 instance itself if the module was found, or None if it wasn't.
150 The optional 'path' argument is ignored -- it's there for compatibility
151 with the importer protocol.
Brett Cannond2e94bb2020-11-13 15:14:58 -0800152
153 Deprecated since Python 3.10. Use find_spec() instead.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300154 """
155 return self.find_loader(fullname, path)[0]
156
Brett Cannond2e94bb2020-11-13 15:14:58 -0800157 def find_spec(self, fullname, target=None):
158 """Create a ModuleSpec for the specified module.
159
160 Returns None if the module cannot be found.
161 """
162 module_info = _get_module_info(self, fullname)
163 if module_info is not None:
164 return _bootstrap.spec_from_loader(fullname, self, is_package=module_info)
165 else:
166 # Not a module or regular package. See if this is a directory, and
167 # therefore possibly a portion of a namespace package.
168
169 # We're only interested in the last path component of fullname
170 # earlier components are recorded in self.prefix.
171 modpath = _get_module_path(self, fullname)
172 if _is_dir(self, modpath):
173 # This is possibly a portion of a namespace
174 # package. Return the string representing its path,
175 # without a trailing separator.
176 path = f'{self.archive}{path_sep}{modpath}'
177 spec = _bootstrap.ModuleSpec(name=fullname, loader=None,
178 is_package=True)
179 spec.submodule_search_locations.append(path)
180 return spec
181 else:
182 return None
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300183
184 def get_code(self, fullname):
185 """get_code(fullname) -> code object.
186
187 Return the code object for the specified module. Raise ZipImportError
Irit Katrielfb340962020-12-19 00:09:54 +0000188 if the module couldn't be imported.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300189 """
190 code, ispackage, modpath = _get_module_code(self, fullname)
191 return code
192
193
194 def get_data(self, pathname):
195 """get_data(pathname) -> string with file data.
196
197 Return the data associated with 'pathname'. Raise OSError if
198 the file wasn't found.
199 """
200 if alt_path_sep:
201 pathname = pathname.replace(alt_path_sep, path_sep)
202
203 key = pathname
204 if pathname.startswith(self.archive + path_sep):
205 key = pathname[len(self.archive + path_sep):]
206
207 try:
208 toc_entry = self._files[key]
209 except KeyError:
210 raise OSError(0, '', key)
211 return _get_data(self.archive, toc_entry)
212
213
214 # Return a string matching __file__ for the named module
215 def get_filename(self, fullname):
216 """get_filename(fullname) -> filename string.
217
Irit Katrielfb340962020-12-19 00:09:54 +0000218 Return the filename for the specified module or raise ZipImportError
219 if it couldn't be imported.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300220 """
221 # Deciding the filename requires working out where the code
222 # would come from if the module was actually loaded
223 code, ispackage, modpath = _get_module_code(self, fullname)
224 return modpath
225
226
227 def get_source(self, fullname):
228 """get_source(fullname) -> source string.
229
230 Return the source code for the specified module. Raise ZipImportError
231 if the module couldn't be found, return None if the archive does
232 contain the module, but has no source for it.
233 """
234 mi = _get_module_info(self, fullname)
235 if mi is None:
236 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
237
238 path = _get_module_path(self, fullname)
239 if mi:
240 fullpath = _bootstrap_external._path_join(path, '__init__.py')
241 else:
242 fullpath = f'{path}.py'
243
244 try:
245 toc_entry = self._files[fullpath]
246 except KeyError:
247 # we have the module, but no source
248 return None
249 return _get_data(self.archive, toc_entry).decode()
250
251
252 # Return a bool signifying whether the module is a package or not.
253 def is_package(self, fullname):
254 """is_package(fullname) -> bool.
255
256 Return True if the module specified by fullname is a package.
257 Raise ZipImportError if the module couldn't be found.
258 """
259 mi = _get_module_info(self, fullname)
260 if mi is None:
261 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
262 return mi
263
264
265 # Load and return the module named by 'fullname'.
266 def load_module(self, fullname):
267 """load_module(fullname) -> module.
268
269 Load the module specified by 'fullname'. 'fullname' must be the
270 fully qualified (dotted) module name. It returns the imported
Irit Katrielfb340962020-12-19 00:09:54 +0000271 module, or raises ZipImportError if it could not be imported.
Brett Cannond2e94bb2020-11-13 15:14:58 -0800272
Brett Cannon2de50972020-12-04 15:39:21 -0800273 Deprecated since Python 3.10. Use exec_module() instead.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300274 """
Brett Cannon2de50972020-12-04 15:39:21 -0800275 msg = ("zipimport.zipimporter.load_module() is deprecated and slated for "
276 "removal in Python 3.12; use exec_module() instead")
277 _warnings.warn(msg, DeprecationWarning)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300278 code, ispackage, modpath = _get_module_code(self, fullname)
279 mod = sys.modules.get(fullname)
280 if mod is None or not isinstance(mod, _module_type):
281 mod = _module_type(fullname)
282 sys.modules[fullname] = mod
283 mod.__loader__ = self
284
285 try:
286 if ispackage:
287 # add __path__ to the module *before* the code gets
288 # executed
289 path = _get_module_path(self, fullname)
290 fullpath = _bootstrap_external._path_join(self.archive, path)
291 mod.__path__ = [fullpath]
292
293 if not hasattr(mod, '__builtins__'):
294 mod.__builtins__ = __builtins__
295 _bootstrap_external._fix_up_module(mod.__dict__, fullname, modpath)
296 exec(code, mod.__dict__)
297 except:
298 del sys.modules[fullname]
299 raise
300
301 try:
302 mod = sys.modules[fullname]
303 except KeyError:
304 raise ImportError(f'Loaded module {fullname!r} not found in sys.modules')
305 _bootstrap._verbose_message('import {} # loaded from Zip {}', fullname, modpath)
306 return mod
307
308
309 def get_resource_reader(self, fullname):
310 """Return the ResourceReader for a package in a zip file.
311
312 If 'fullname' is a package within the zip file, return the
313 'ResourceReader' object for the package. Otherwise return None.
314 """
Serhiy Storchaka9da39612018-09-19 09:28:06 +0300315 try:
316 if not self.is_package(fullname):
317 return None
318 except ZipImportError:
319 return None
Jason R. Coombs843c2772020-06-07 21:00:51 -0400320 from importlib.readers import ZipReader
321 return ZipReader(self, fullname)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300322
323
324 def __repr__(self):
325 return f'<zipimporter object "{self.archive}{path_sep}{self.prefix}">'
326
327
328# _zip_searchorder defines how we search for a module in the Zip
329# archive: we first search for a package __init__, then for
330# non-package .pyc, and .py entries. The .pyc entries
331# are swapped by initzipimport() if we run in optimized mode. Also,
332# '/' is replaced by path_sep there.
333_zip_searchorder = (
334 (path_sep + '__init__.pyc', True, True),
335 (path_sep + '__init__.py', False, True),
336 ('.pyc', True, False),
337 ('.py', False, False),
338)
339
340# Given a module name, return the potential file path in the
341# archive (without extension).
342def _get_module_path(self, fullname):
343 return self.prefix + fullname.rpartition('.')[2]
344
345# Does this path represent a directory?
346def _is_dir(self, path):
347 # See if this is a "directory". If so, it's eligible to be part
348 # of a namespace package. We test by seeing if the name, with an
349 # appended path separator, exists.
350 dirpath = path + path_sep
351 # If dirpath is present in self._files, we have a directory.
352 return dirpath in self._files
353
354# Return some information about a module.
355def _get_module_info(self, fullname):
356 path = _get_module_path(self, fullname)
357 for suffix, isbytecode, ispackage in _zip_searchorder:
358 fullpath = path + suffix
359 if fullpath in self._files:
360 return ispackage
361 return None
362
363
364# implementation
365
366# _read_directory(archive) -> files dict (new reference)
367#
368# Given a path to a Zip archive, build a dict, mapping file names
369# (local to the archive, using SEP as a separator) to toc entries.
370#
371# A toc_entry is a tuple:
372#
373# (__file__, # value to use for __file__, available for all files,
374# # encoded to the filesystem encoding
375# compress, # compression kind; 0 for uncompressed
376# data_size, # size of compressed data on disk
377# file_size, # size of decompressed data
378# file_offset, # offset of file header from start of archive
379# time, # mod time of file (in dos format)
380# date, # mod data of file (in dos format)
381# crc, # crc checksum of the data
382# )
383#
384# Directories can be recognized by the trailing path_sep in the name,
385# data_size and file_offset are 0.
386def _read_directory(archive):
387 try:
Steve Dowerb82e17e2019-05-23 08:45:22 -0700388 fp = _io.open_code(archive)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300389 except OSError:
390 raise ZipImportError(f"can't open Zip file: {archive!r}", path=archive)
391
392 with fp:
393 try:
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600394 fp.seek(-END_CENTRAL_DIR_SIZE, 2)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300395 header_position = fp.tell()
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600396 buffer = fp.read(END_CENTRAL_DIR_SIZE)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300397 except OSError:
398 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600399 if len(buffer) != END_CENTRAL_DIR_SIZE:
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300400 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600401 if buffer[:4] != STRING_END_ARCHIVE:
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300402 # Bad: End of Central Dir signature
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600403 # Check if there's a comment.
404 try:
405 fp.seek(0, 2)
406 file_size = fp.tell()
407 except OSError:
408 raise ZipImportError(f"can't read Zip file: {archive!r}",
409 path=archive)
410 max_comment_start = max(file_size - MAX_COMMENT_LEN -
411 END_CENTRAL_DIR_SIZE, 0)
412 try:
413 fp.seek(max_comment_start)
414 data = fp.read()
415 except OSError:
416 raise ZipImportError(f"can't read Zip file: {archive!r}",
417 path=archive)
418 pos = data.rfind(STRING_END_ARCHIVE)
419 if pos < 0:
420 raise ZipImportError(f'not a Zip file: {archive!r}',
421 path=archive)
422 buffer = data[pos:pos+END_CENTRAL_DIR_SIZE]
423 if len(buffer) != END_CENTRAL_DIR_SIZE:
424 raise ZipImportError(f"corrupt Zip file: {archive!r}",
425 path=archive)
426 header_position = file_size - len(data) + pos
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300427
428 header_size = _unpack_uint32(buffer[12:16])
429 header_offset = _unpack_uint32(buffer[16:20])
430 if header_position < header_size:
431 raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
432 if header_position < header_offset:
433 raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
434 header_position -= header_size
435 arc_offset = header_position - header_offset
436 if arc_offset < 0:
437 raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)
438
439 files = {}
440 # Start of Central Directory
441 count = 0
442 try:
443 fp.seek(header_position)
444 except OSError:
445 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
446 while True:
447 buffer = fp.read(46)
448 if len(buffer) < 4:
449 raise EOFError('EOF read where not expected')
450 # Start of file header
451 if buffer[:4] != b'PK\x01\x02':
452 break # Bad: Central Dir File Header
453 if len(buffer) != 46:
454 raise EOFError('EOF read where not expected')
455 flags = _unpack_uint16(buffer[8:10])
456 compress = _unpack_uint16(buffer[10:12])
457 time = _unpack_uint16(buffer[12:14])
458 date = _unpack_uint16(buffer[14:16])
459 crc = _unpack_uint32(buffer[16:20])
460 data_size = _unpack_uint32(buffer[20:24])
461 file_size = _unpack_uint32(buffer[24:28])
462 name_size = _unpack_uint16(buffer[28:30])
463 extra_size = _unpack_uint16(buffer[30:32])
464 comment_size = _unpack_uint16(buffer[32:34])
465 file_offset = _unpack_uint32(buffer[42:46])
466 header_size = name_size + extra_size + comment_size
467 if file_offset > header_offset:
468 raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
469 file_offset += arc_offset
470
471 try:
472 name = fp.read(name_size)
473 except OSError:
474 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
475 if len(name) != name_size:
476 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
477 # On Windows, calling fseek to skip over the fields we don't use is
478 # slower than reading the data because fseek flushes stdio's
479 # internal buffers. See issue #8745.
480 try:
481 if len(fp.read(header_size - name_size)) != header_size - name_size:
482 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
483 except OSError:
484 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
485
486 if flags & 0x800:
487 # UTF-8 file names extension
488 name = name.decode()
489 else:
490 # Historical ZIP filename encoding
491 try:
492 name = name.decode('ascii')
493 except UnicodeDecodeError:
494 name = name.decode('latin1').translate(cp437_table)
495
496 name = name.replace('/', path_sep)
497 path = _bootstrap_external._path_join(archive, name)
498 t = (path, compress, data_size, file_size, file_offset, time, date, crc)
499 files[name] = t
500 count += 1
501 _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive)
502 return files
503
504# During bootstrap, we may need to load the encodings
505# package from a ZIP file. But the cp437 encoding is implemented
506# in Python in the encodings package.
507#
508# Break out of this dependency by using the translation table for
509# the cp437 encoding.
510cp437_table = (
511 # ASCII part, 8 rows x 16 chars
512 '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
513 '\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'
514 ' !"#$%&\'()*+,-./'
515 '0123456789:;<=>?'
516 '@ABCDEFGHIJKLMNO'
517 'PQRSTUVWXYZ[\\]^_'
518 '`abcdefghijklmno'
519 'pqrstuvwxyz{|}~\x7f'
520 # non-ASCII part, 16 rows x 8 chars
521 '\xc7\xfc\xe9\xe2\xe4\xe0\xe5\xe7'
522 '\xea\xeb\xe8\xef\xee\xec\xc4\xc5'
523 '\xc9\xe6\xc6\xf4\xf6\xf2\xfb\xf9'
524 '\xff\xd6\xdc\xa2\xa3\xa5\u20a7\u0192'
525 '\xe1\xed\xf3\xfa\xf1\xd1\xaa\xba'
526 '\xbf\u2310\xac\xbd\xbc\xa1\xab\xbb'
527 '\u2591\u2592\u2593\u2502\u2524\u2561\u2562\u2556'
528 '\u2555\u2563\u2551\u2557\u255d\u255c\u255b\u2510'
529 '\u2514\u2534\u252c\u251c\u2500\u253c\u255e\u255f'
530 '\u255a\u2554\u2569\u2566\u2560\u2550\u256c\u2567'
531 '\u2568\u2564\u2565\u2559\u2558\u2552\u2553\u256b'
532 '\u256a\u2518\u250c\u2588\u2584\u258c\u2590\u2580'
533 '\u03b1\xdf\u0393\u03c0\u03a3\u03c3\xb5\u03c4'
534 '\u03a6\u0398\u03a9\u03b4\u221e\u03c6\u03b5\u2229'
535 '\u2261\xb1\u2265\u2264\u2320\u2321\xf7\u2248'
536 '\xb0\u2219\xb7\u221a\u207f\xb2\u25a0\xa0'
537)
538
539_importing_zlib = False
540
541# Return the zlib.decompress function object, or NULL if zlib couldn't
542# be imported. The function is cached when found, so subsequent calls
543# don't import zlib again.
544def _get_decompress_func():
545 global _importing_zlib
546 if _importing_zlib:
547 # Someone has a zlib.py[co] in their Zip file
548 # let's avoid a stack overflow.
549 _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
550 raise ZipImportError("can't decompress data; zlib not available")
551
552 _importing_zlib = True
553 try:
554 from zlib import decompress
555 except Exception:
556 _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
557 raise ZipImportError("can't decompress data; zlib not available")
558 finally:
559 _importing_zlib = False
560
561 _bootstrap._verbose_message('zipimport: zlib available')
562 return decompress
563
564# Given a path to a Zip file and a toc_entry, return the (uncompressed) data.
565def _get_data(archive, toc_entry):
566 datapath, compress, data_size, file_size, file_offset, time, date, crc = toc_entry
567 if data_size < 0:
568 raise ZipImportError('negative data size')
569
Steve Dowerb82e17e2019-05-23 08:45:22 -0700570 with _io.open_code(archive) as fp:
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300571 # Check to make sure the local file header is correct
572 try:
573 fp.seek(file_offset)
574 except OSError:
575 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
576 buffer = fp.read(30)
577 if len(buffer) != 30:
578 raise EOFError('EOF read where not expected')
579
580 if buffer[:4] != b'PK\x03\x04':
581 # Bad: Local File Header
582 raise ZipImportError(f'bad local file header: {archive!r}', path=archive)
583
584 name_size = _unpack_uint16(buffer[26:28])
585 extra_size = _unpack_uint16(buffer[28:30])
586 header_size = 30 + name_size + extra_size
587 file_offset += header_size # Start of file data
588 try:
589 fp.seek(file_offset)
590 except OSError:
591 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
592 raw_data = fp.read(data_size)
593 if len(raw_data) != data_size:
594 raise OSError("zipimport: can't read data")
595
596 if compress == 0:
597 # data is not compressed
598 return raw_data
599
600 # Decompress with zlib
601 try:
602 decompress = _get_decompress_func()
603 except Exception:
604 raise ZipImportError("can't decompress data; zlib not available")
605 return decompress(raw_data, -15)
606
607
608# Lenient date/time comparison function. The precision of the mtime
609# in the archive is lower than the mtime stored in a .pyc: we
610# must allow a difference of at most one second.
611def _eq_mtime(t1, t2):
612 # dostime only stores even seconds, so be lenient
613 return abs(t1 - t2) <= 1
614
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500615
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300616# Given the contents of a .py[co] file, unmarshal the data
Irit Katrielfb340962020-12-19 00:09:54 +0000617# and return the code object. Raises ImportError it the magic word doesn't
618# match, or if the recorded .py[co] metadata does not match the source.
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500619def _unmarshal_code(self, pathname, fullpath, fullname, data):
620 exc_details = {
621 'name': fullname,
622 'path': fullpath,
623 }
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300624
Irit Katrielfb340962020-12-19 00:09:54 +0000625 flags = _bootstrap_external._classify_pyc(data, fullname, exc_details)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300626
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500627 hash_based = flags & 0b1 != 0
628 if hash_based:
629 check_source = flags & 0b10 != 0
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300630 if (_imp.check_hash_based_pycs != 'never' and
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500631 (check_source or _imp.check_hash_based_pycs == 'always')):
632 source_bytes = _get_pyc_source(self, fullpath)
633 if source_bytes is not None:
634 source_hash = _imp.source_hash(
635 _bootstrap_external._RAW_MAGIC_NUMBER,
636 source_bytes,
637 )
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300638
Irit Katrielfb340962020-12-19 00:09:54 +0000639 _bootstrap_external._validate_hash_pyc(
640 data, source_hash, fullname, exc_details)
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500641 else:
642 source_mtime, source_size = \
643 _get_mtime_and_size_of_source(self, fullpath)
644
645 if source_mtime:
646 # We don't use _bootstrap_external._validate_timestamp_pyc
647 # to allow for a more lenient timestamp check.
648 if (not _eq_mtime(_unpack_uint32(data[8:12]), source_mtime) or
649 _unpack_uint32(data[12:16]) != source_size):
650 _bootstrap._verbose_message(
651 f'bytecode is stale for {fullname!r}')
652 return None
653
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300654 code = marshal.loads(data[16:])
655 if not isinstance(code, _code_type):
656 raise TypeError(f'compiled module {pathname!r} is not a code object')
657 return code
658
659_code_type = type(_unmarshal_code.__code__)
660
661
662# Replace any occurrences of '\r\n?' in the input string with '\n'.
663# This converts DOS and Mac line endings to Unix line endings.
664def _normalize_line_endings(source):
665 source = source.replace(b'\r\n', b'\n')
666 source = source.replace(b'\r', b'\n')
667 return source
668
669# Given a string buffer containing Python source code, compile it
670# and return a code object.
671def _compile_source(pathname, source):
672 source = _normalize_line_endings(source)
673 return compile(source, pathname, 'exec', dont_inherit=True)
674
675# Convert the date/time values found in the Zip archive to a value
676# that's compatible with the time stamp stored in .pyc files.
677def _parse_dostime(d, t):
678 return time.mktime((
679 (d >> 9) + 1980, # bits 9..15: year
680 (d >> 5) & 0xF, # bits 5..8: month
681 d & 0x1F, # bits 0..4: day
682 t >> 11, # bits 11..15: hours
683 (t >> 5) & 0x3F, # bits 8..10: minutes
684 (t & 0x1F) * 2, # bits 0..7: seconds / 2
685 -1, -1, -1))
686
687# Given a path to a .pyc file in the archive, return the
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500688# modification time of the matching .py file and its size,
689# or (0, 0) if no source is available.
690def _get_mtime_and_size_of_source(self, path):
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300691 try:
692 # strip 'c' or 'o' from *.py[co]
693 assert path[-1:] in ('c', 'o')
694 path = path[:-1]
695 toc_entry = self._files[path]
696 # fetch the time stamp of the .py file for comparison
697 # with an embedded pyc time stamp
698 time = toc_entry[5]
699 date = toc_entry[6]
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500700 uncompressed_size = toc_entry[3]
701 return _parse_dostime(date, time), uncompressed_size
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300702 except (KeyError, IndexError, TypeError):
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500703 return 0, 0
704
705
706# Given a path to a .pyc file in the archive, return the
707# contents of the matching .py file, or None if no source
708# is available.
709def _get_pyc_source(self, path):
710 # strip 'c' or 'o' from *.py[co]
711 assert path[-1:] in ('c', 'o')
712 path = path[:-1]
713
714 try:
715 toc_entry = self._files[path]
716 except KeyError:
717 return None
718 else:
719 return _get_data(self.archive, toc_entry)
720
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300721
722# Get the code object associated with the module specified by
723# 'fullname'.
724def _get_module_code(self, fullname):
725 path = _get_module_path(self, fullname)
Irit Katrielfb340962020-12-19 00:09:54 +0000726 import_error = None
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300727 for suffix, isbytecode, ispackage in _zip_searchorder:
728 fullpath = path + suffix
729 _bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2)
730 try:
731 toc_entry = self._files[fullpath]
732 except KeyError:
733 pass
734 else:
735 modpath = toc_entry[0]
736 data = _get_data(self.archive, toc_entry)
Irit Katrielfb340962020-12-19 00:09:54 +0000737 code = None
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300738 if isbytecode:
Irit Katrielfb340962020-12-19 00:09:54 +0000739 try:
740 code = _unmarshal_code(self, modpath, fullpath, fullname, data)
741 except ImportError as exc:
742 import_error = exc
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300743 else:
744 code = _compile_source(modpath, data)
745 if code is None:
746 # bad magic number or non-matching mtime
747 # in byte code, try next
748 continue
749 modpath = toc_entry[0]
750 return code, ispackage, modpath
751 else:
Irit Katrielfb340962020-12-19 00:09:54 +0000752 if import_error:
753 msg = f"module load failed: {import_error}"
754 raise ZipImportError(msg, name=fullname) from import_error
755 else:
756 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)