blob: c55fec6aa1c47da4d4ae65b251e95f049a7d578c [file] [log] [blame]
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +03001"""zipimport provides support for importing Python modules from Zip archives.
2
3This module exports three objects:
4- zipimporter: a class; its constructor takes a path to a Zip archive.
5- ZipImportError: exception raised by zipimporter objects. It's a
6 subclass of ImportError, so it can be caught as ImportError, too.
7- _zip_directory_cache: a dict, mapping archive paths to zip directory
8 info dicts, as used in zipimporter._files.
9
10It is usually not needed to use the zipimport module explicitly; it is
11used by the builtin import mechanism for sys.path items that are paths
12to Zip archives.
13"""
14
15#from importlib import _bootstrap_external
16#from importlib import _bootstrap # for _verbose_message
17import _frozen_importlib_external as _bootstrap_external
18from _frozen_importlib_external import _unpack_uint16, _unpack_uint32
19import _frozen_importlib as _bootstrap # for _verbose_message
20import _imp # for check_hash_based_pycs
21import _io # for open
22import marshal # for loads
23import sys # for modules
24import time # for mktime
Brett Cannon2de50972020-12-04 15:39:21 -080025import _warnings # For warn()
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +030026
27__all__ = ['ZipImportError', 'zipimporter']
28
29
30path_sep = _bootstrap_external.path_sep
31alt_path_sep = _bootstrap_external.path_separators[1:]
32
33
34class ZipImportError(ImportError):
35 pass
36
37# _read_directory() cache
38_zip_directory_cache = {}
39
40_module_type = type(sys)
41
Zackery Spytz5a5ce062018-09-25 13:15:47 -060042END_CENTRAL_DIR_SIZE = 22
43STRING_END_ARCHIVE = b'PK\x05\x06'
44MAX_COMMENT_LEN = (1 << 16) - 1
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +030045
Brett Cannond2e94bb2020-11-13 15:14:58 -080046class zipimporter(_bootstrap_external._LoaderBasics):
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +030047 """zipimporter(archivepath) -> zipimporter object
48
49 Create a new zipimporter instance. 'archivepath' must be a path to
50 a zipfile, or to a specific path inside a zipfile. For example, it can be
51 '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a
52 valid directory inside the archive.
53
54 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip
55 archive.
56
57 The 'archive' attribute of zipimporter objects contains the name of the
58 zipfile targeted.
59 """
60
61 # Split the "subdirectory" from the Zip archive path, lookup a matching
62 # entry in sys.path_importer_cache, fetch the file directory from there
63 # if found, or else read it from the archive.
64 def __init__(self, path):
65 if not isinstance(path, str):
66 import os
67 path = os.fsdecode(path)
68 if not path:
69 raise ZipImportError('archive path is empty', path=path)
70 if alt_path_sep:
71 path = path.replace(alt_path_sep, path_sep)
72
73 prefix = []
74 while True:
75 try:
76 st = _bootstrap_external._path_stat(path)
77 except (OSError, ValueError):
78 # On Windows a ValueError is raised for too long paths.
79 # Back up one path element.
80 dirname, basename = _bootstrap_external._path_split(path)
81 if dirname == path:
82 raise ZipImportError('not a Zip file', path=path)
83 path = dirname
84 prefix.append(basename)
85 else:
86 # it exists
87 if (st.st_mode & 0o170000) != 0o100000: # stat.S_ISREG
88 # it's a not file
89 raise ZipImportError('not a Zip file', path=path)
90 break
91
92 try:
93 files = _zip_directory_cache[path]
94 except KeyError:
95 files = _read_directory(path)
96 _zip_directory_cache[path] = files
97 self._files = files
98 self.archive = path
99 # a prefix directory following the ZIP file path.
100 self.prefix = _bootstrap_external._path_join(*prefix[::-1])
101 if self.prefix:
102 self.prefix += path_sep
103
104
105 # Check whether we can satisfy the import of the module named by
106 # 'fullname', or whether it could be a portion of a namespace
107 # package. Return self if we can load it, a string containing the
108 # full path if it's a possible namespace portion, None if we
109 # can't load it.
110 def find_loader(self, fullname, path=None):
111 """find_loader(fullname, path=None) -> self, str or None.
112
113 Search for a module specified by 'fullname'. 'fullname' must be the
114 fully qualified (dotted) module name. It returns the zipimporter
115 instance itself if the module was found, a string containing the
116 full path name if it's possibly a portion of a namespace package,
117 or None otherwise. The optional 'path' argument is ignored -- it's
118 there for compatibility with the importer protocol.
Brett Cannond2e94bb2020-11-13 15:14:58 -0800119
120 Deprecated since Python 3.10. Use find_spec() instead.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300121 """
Brett Cannon57c6cb52021-04-06 08:56:57 -0700122 _warnings.warn("zipimporter.find_loader() is deprecated and slated for "
123 "removal in Python 3.12; use find_spec() instead",
124 DeprecationWarning)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300125 mi = _get_module_info(self, fullname)
126 if mi is not None:
127 # This is a module or package.
128 return self, []
129
130 # Not a module or regular package. See if this is a directory, and
131 # therefore possibly a portion of a namespace package.
132
133 # We're only interested in the last path component of fullname
134 # earlier components are recorded in self.prefix.
135 modpath = _get_module_path(self, fullname)
136 if _is_dir(self, modpath):
137 # This is possibly a portion of a namespace
138 # package. Return the string representing its path,
139 # without a trailing separator.
140 return None, [f'{self.archive}{path_sep}{modpath}']
141
142 return None, []
143
144
145 # Check whether we can satisfy the import of the module named by
146 # 'fullname'. Return self if we can, None if we can't.
147 def find_module(self, fullname, path=None):
148 """find_module(fullname, path=None) -> self or None.
149
150 Search for a module specified by 'fullname'. 'fullname' must be the
151 fully qualified (dotted) module name. It returns the zipimporter
152 instance itself if the module was found, or None if it wasn't.
153 The optional 'path' argument is ignored -- it's there for compatibility
154 with the importer protocol.
Brett Cannond2e94bb2020-11-13 15:14:58 -0800155
156 Deprecated since Python 3.10. Use find_spec() instead.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300157 """
Brett Cannon57c6cb52021-04-06 08:56:57 -0700158 _warnings.warn("zipimporter.find_module() is deprecated and slated for "
159 "removal in Python 3.12; use find_spec() instead",
160 DeprecationWarning)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300161 return self.find_loader(fullname, path)[0]
162
Brett Cannond2e94bb2020-11-13 15:14:58 -0800163 def find_spec(self, fullname, target=None):
164 """Create a ModuleSpec for the specified module.
165
166 Returns None if the module cannot be found.
167 """
168 module_info = _get_module_info(self, fullname)
169 if module_info is not None:
170 return _bootstrap.spec_from_loader(fullname, self, is_package=module_info)
171 else:
172 # Not a module or regular package. See if this is a directory, and
173 # therefore possibly a portion of a namespace package.
174
175 # We're only interested in the last path component of fullname
176 # earlier components are recorded in self.prefix.
177 modpath = _get_module_path(self, fullname)
178 if _is_dir(self, modpath):
179 # This is possibly a portion of a namespace
180 # package. Return the string representing its path,
181 # without a trailing separator.
182 path = f'{self.archive}{path_sep}{modpath}'
183 spec = _bootstrap.ModuleSpec(name=fullname, loader=None,
184 is_package=True)
185 spec.submodule_search_locations.append(path)
186 return spec
187 else:
188 return None
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300189
190 def get_code(self, fullname):
191 """get_code(fullname) -> code object.
192
193 Return the code object for the specified module. Raise ZipImportError
Irit Katrielfb340962020-12-19 00:09:54 +0000194 if the module couldn't be imported.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300195 """
196 code, ispackage, modpath = _get_module_code(self, fullname)
197 return code
198
199
200 def get_data(self, pathname):
201 """get_data(pathname) -> string with file data.
202
203 Return the data associated with 'pathname'. Raise OSError if
204 the file wasn't found.
205 """
206 if alt_path_sep:
207 pathname = pathname.replace(alt_path_sep, path_sep)
208
209 key = pathname
210 if pathname.startswith(self.archive + path_sep):
211 key = pathname[len(self.archive + path_sep):]
212
213 try:
214 toc_entry = self._files[key]
215 except KeyError:
216 raise OSError(0, '', key)
217 return _get_data(self.archive, toc_entry)
218
219
220 # Return a string matching __file__ for the named module
221 def get_filename(self, fullname):
222 """get_filename(fullname) -> filename string.
223
Irit Katrielfb340962020-12-19 00:09:54 +0000224 Return the filename for the specified module or raise ZipImportError
225 if it couldn't be imported.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300226 """
227 # Deciding the filename requires working out where the code
228 # would come from if the module was actually loaded
229 code, ispackage, modpath = _get_module_code(self, fullname)
230 return modpath
231
232
233 def get_source(self, fullname):
234 """get_source(fullname) -> source string.
235
236 Return the source code for the specified module. Raise ZipImportError
237 if the module couldn't be found, return None if the archive does
238 contain the module, but has no source for it.
239 """
240 mi = _get_module_info(self, fullname)
241 if mi is None:
242 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
243
244 path = _get_module_path(self, fullname)
245 if mi:
246 fullpath = _bootstrap_external._path_join(path, '__init__.py')
247 else:
248 fullpath = f'{path}.py'
249
250 try:
251 toc_entry = self._files[fullpath]
252 except KeyError:
253 # we have the module, but no source
254 return None
255 return _get_data(self.archive, toc_entry).decode()
256
257
258 # Return a bool signifying whether the module is a package or not.
259 def is_package(self, fullname):
260 """is_package(fullname) -> bool.
261
262 Return True if the module specified by fullname is a package.
263 Raise ZipImportError if the module couldn't be found.
264 """
265 mi = _get_module_info(self, fullname)
266 if mi is None:
267 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
268 return mi
269
270
271 # Load and return the module named by 'fullname'.
272 def load_module(self, fullname):
273 """load_module(fullname) -> module.
274
275 Load the module specified by 'fullname'. 'fullname' must be the
276 fully qualified (dotted) module name. It returns the imported
Irit Katrielfb340962020-12-19 00:09:54 +0000277 module, or raises ZipImportError if it could not be imported.
Brett Cannond2e94bb2020-11-13 15:14:58 -0800278
Brett Cannon2de50972020-12-04 15:39:21 -0800279 Deprecated since Python 3.10. Use exec_module() instead.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300280 """
Brett Cannon2de50972020-12-04 15:39:21 -0800281 msg = ("zipimport.zipimporter.load_module() is deprecated and slated for "
282 "removal in Python 3.12; use exec_module() instead")
283 _warnings.warn(msg, DeprecationWarning)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300284 code, ispackage, modpath = _get_module_code(self, fullname)
285 mod = sys.modules.get(fullname)
286 if mod is None or not isinstance(mod, _module_type):
287 mod = _module_type(fullname)
288 sys.modules[fullname] = mod
289 mod.__loader__ = self
290
291 try:
292 if ispackage:
293 # add __path__ to the module *before* the code gets
294 # executed
295 path = _get_module_path(self, fullname)
296 fullpath = _bootstrap_external._path_join(self.archive, path)
297 mod.__path__ = [fullpath]
298
299 if not hasattr(mod, '__builtins__'):
300 mod.__builtins__ = __builtins__
301 _bootstrap_external._fix_up_module(mod.__dict__, fullname, modpath)
302 exec(code, mod.__dict__)
303 except:
304 del sys.modules[fullname]
305 raise
306
307 try:
308 mod = sys.modules[fullname]
309 except KeyError:
310 raise ImportError(f'Loaded module {fullname!r} not found in sys.modules')
311 _bootstrap._verbose_message('import {} # loaded from Zip {}', fullname, modpath)
312 return mod
313
314
315 def get_resource_reader(self, fullname):
316 """Return the ResourceReader for a package in a zip file.
317
318 If 'fullname' is a package within the zip file, return the
319 'ResourceReader' object for the package. Otherwise return None.
320 """
Serhiy Storchaka9da39612018-09-19 09:28:06 +0300321 try:
322 if not self.is_package(fullname):
323 return None
324 except ZipImportError:
325 return None
Jason R. Coombs843c2772020-06-07 21:00:51 -0400326 from importlib.readers import ZipReader
327 return ZipReader(self, fullname)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300328
329
Desmond Cheong3abf6f02021-03-09 04:06:02 +0800330 def invalidate_caches(self):
331 """Reload the file data of the archive path."""
332 try:
333 self._files = _read_directory(self.archive)
334 _zip_directory_cache[self.archive] = self._files
335 except ZipImportError:
336 _zip_directory_cache.pop(self.archive, None)
337 self._files = None
338
339
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300340 def __repr__(self):
341 return f'<zipimporter object "{self.archive}{path_sep}{self.prefix}">'
342
343
344# _zip_searchorder defines how we search for a module in the Zip
345# archive: we first search for a package __init__, then for
346# non-package .pyc, and .py entries. The .pyc entries
347# are swapped by initzipimport() if we run in optimized mode. Also,
348# '/' is replaced by path_sep there.
349_zip_searchorder = (
350 (path_sep + '__init__.pyc', True, True),
351 (path_sep + '__init__.py', False, True),
352 ('.pyc', True, False),
353 ('.py', False, False),
354)
355
356# Given a module name, return the potential file path in the
357# archive (without extension).
358def _get_module_path(self, fullname):
359 return self.prefix + fullname.rpartition('.')[2]
360
361# Does this path represent a directory?
362def _is_dir(self, path):
363 # See if this is a "directory". If so, it's eligible to be part
364 # of a namespace package. We test by seeing if the name, with an
365 # appended path separator, exists.
366 dirpath = path + path_sep
367 # If dirpath is present in self._files, we have a directory.
368 return dirpath in self._files
369
370# Return some information about a module.
371def _get_module_info(self, fullname):
372 path = _get_module_path(self, fullname)
373 for suffix, isbytecode, ispackage in _zip_searchorder:
374 fullpath = path + suffix
375 if fullpath in self._files:
376 return ispackage
377 return None
378
379
380# implementation
381
382# _read_directory(archive) -> files dict (new reference)
383#
384# Given a path to a Zip archive, build a dict, mapping file names
385# (local to the archive, using SEP as a separator) to toc entries.
386#
387# A toc_entry is a tuple:
388#
389# (__file__, # value to use for __file__, available for all files,
390# # encoded to the filesystem encoding
391# compress, # compression kind; 0 for uncompressed
392# data_size, # size of compressed data on disk
393# file_size, # size of decompressed data
394# file_offset, # offset of file header from start of archive
395# time, # mod time of file (in dos format)
396# date, # mod data of file (in dos format)
397# crc, # crc checksum of the data
398# )
399#
400# Directories can be recognized by the trailing path_sep in the name,
401# data_size and file_offset are 0.
402def _read_directory(archive):
403 try:
Steve Dowerb82e17e2019-05-23 08:45:22 -0700404 fp = _io.open_code(archive)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300405 except OSError:
406 raise ZipImportError(f"can't open Zip file: {archive!r}", path=archive)
407
408 with fp:
409 try:
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600410 fp.seek(-END_CENTRAL_DIR_SIZE, 2)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300411 header_position = fp.tell()
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600412 buffer = fp.read(END_CENTRAL_DIR_SIZE)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300413 except OSError:
414 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600415 if len(buffer) != END_CENTRAL_DIR_SIZE:
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300416 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600417 if buffer[:4] != STRING_END_ARCHIVE:
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300418 # Bad: End of Central Dir signature
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600419 # Check if there's a comment.
420 try:
421 fp.seek(0, 2)
422 file_size = fp.tell()
423 except OSError:
424 raise ZipImportError(f"can't read Zip file: {archive!r}",
425 path=archive)
426 max_comment_start = max(file_size - MAX_COMMENT_LEN -
427 END_CENTRAL_DIR_SIZE, 0)
428 try:
429 fp.seek(max_comment_start)
430 data = fp.read()
431 except OSError:
432 raise ZipImportError(f"can't read Zip file: {archive!r}",
433 path=archive)
434 pos = data.rfind(STRING_END_ARCHIVE)
435 if pos < 0:
436 raise ZipImportError(f'not a Zip file: {archive!r}',
437 path=archive)
438 buffer = data[pos:pos+END_CENTRAL_DIR_SIZE]
439 if len(buffer) != END_CENTRAL_DIR_SIZE:
440 raise ZipImportError(f"corrupt Zip file: {archive!r}",
441 path=archive)
442 header_position = file_size - len(data) + pos
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300443
444 header_size = _unpack_uint32(buffer[12:16])
445 header_offset = _unpack_uint32(buffer[16:20])
446 if header_position < header_size:
447 raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
448 if header_position < header_offset:
449 raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
450 header_position -= header_size
451 arc_offset = header_position - header_offset
452 if arc_offset < 0:
453 raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)
454
455 files = {}
456 # Start of Central Directory
457 count = 0
458 try:
459 fp.seek(header_position)
460 except OSError:
461 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
462 while True:
463 buffer = fp.read(46)
464 if len(buffer) < 4:
465 raise EOFError('EOF read where not expected')
466 # Start of file header
467 if buffer[:4] != b'PK\x01\x02':
468 break # Bad: Central Dir File Header
469 if len(buffer) != 46:
470 raise EOFError('EOF read where not expected')
471 flags = _unpack_uint16(buffer[8:10])
472 compress = _unpack_uint16(buffer[10:12])
473 time = _unpack_uint16(buffer[12:14])
474 date = _unpack_uint16(buffer[14:16])
475 crc = _unpack_uint32(buffer[16:20])
476 data_size = _unpack_uint32(buffer[20:24])
477 file_size = _unpack_uint32(buffer[24:28])
478 name_size = _unpack_uint16(buffer[28:30])
479 extra_size = _unpack_uint16(buffer[30:32])
480 comment_size = _unpack_uint16(buffer[32:34])
481 file_offset = _unpack_uint32(buffer[42:46])
482 header_size = name_size + extra_size + comment_size
483 if file_offset > header_offset:
484 raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
485 file_offset += arc_offset
486
487 try:
488 name = fp.read(name_size)
489 except OSError:
490 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
491 if len(name) != name_size:
492 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
493 # On Windows, calling fseek to skip over the fields we don't use is
494 # slower than reading the data because fseek flushes stdio's
495 # internal buffers. See issue #8745.
496 try:
497 if len(fp.read(header_size - name_size)) != header_size - name_size:
498 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
499 except OSError:
500 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
501
502 if flags & 0x800:
503 # UTF-8 file names extension
504 name = name.decode()
505 else:
506 # Historical ZIP filename encoding
507 try:
508 name = name.decode('ascii')
509 except UnicodeDecodeError:
510 name = name.decode('latin1').translate(cp437_table)
511
512 name = name.replace('/', path_sep)
513 path = _bootstrap_external._path_join(archive, name)
514 t = (path, compress, data_size, file_size, file_offset, time, date, crc)
515 files[name] = t
516 count += 1
517 _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive)
518 return files
519
520# During bootstrap, we may need to load the encodings
521# package from a ZIP file. But the cp437 encoding is implemented
522# in Python in the encodings package.
523#
524# Break out of this dependency by using the translation table for
525# the cp437 encoding.
526cp437_table = (
527 # ASCII part, 8 rows x 16 chars
528 '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
529 '\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'
530 ' !"#$%&\'()*+,-./'
531 '0123456789:;<=>?'
532 '@ABCDEFGHIJKLMNO'
533 'PQRSTUVWXYZ[\\]^_'
534 '`abcdefghijklmno'
535 'pqrstuvwxyz{|}~\x7f'
536 # non-ASCII part, 16 rows x 8 chars
537 '\xc7\xfc\xe9\xe2\xe4\xe0\xe5\xe7'
538 '\xea\xeb\xe8\xef\xee\xec\xc4\xc5'
539 '\xc9\xe6\xc6\xf4\xf6\xf2\xfb\xf9'
540 '\xff\xd6\xdc\xa2\xa3\xa5\u20a7\u0192'
541 '\xe1\xed\xf3\xfa\xf1\xd1\xaa\xba'
542 '\xbf\u2310\xac\xbd\xbc\xa1\xab\xbb'
543 '\u2591\u2592\u2593\u2502\u2524\u2561\u2562\u2556'
544 '\u2555\u2563\u2551\u2557\u255d\u255c\u255b\u2510'
545 '\u2514\u2534\u252c\u251c\u2500\u253c\u255e\u255f'
546 '\u255a\u2554\u2569\u2566\u2560\u2550\u256c\u2567'
547 '\u2568\u2564\u2565\u2559\u2558\u2552\u2553\u256b'
548 '\u256a\u2518\u250c\u2588\u2584\u258c\u2590\u2580'
549 '\u03b1\xdf\u0393\u03c0\u03a3\u03c3\xb5\u03c4'
550 '\u03a6\u0398\u03a9\u03b4\u221e\u03c6\u03b5\u2229'
551 '\u2261\xb1\u2265\u2264\u2320\u2321\xf7\u2248'
552 '\xb0\u2219\xb7\u221a\u207f\xb2\u25a0\xa0'
553)
554
555_importing_zlib = False
556
557# Return the zlib.decompress function object, or NULL if zlib couldn't
558# be imported. The function is cached when found, so subsequent calls
559# don't import zlib again.
560def _get_decompress_func():
561 global _importing_zlib
562 if _importing_zlib:
563 # Someone has a zlib.py[co] in their Zip file
564 # let's avoid a stack overflow.
565 _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
566 raise ZipImportError("can't decompress data; zlib not available")
567
568 _importing_zlib = True
569 try:
570 from zlib import decompress
571 except Exception:
572 _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
573 raise ZipImportError("can't decompress data; zlib not available")
574 finally:
575 _importing_zlib = False
576
577 _bootstrap._verbose_message('zipimport: zlib available')
578 return decompress
579
580# Given a path to a Zip file and a toc_entry, return the (uncompressed) data.
581def _get_data(archive, toc_entry):
582 datapath, compress, data_size, file_size, file_offset, time, date, crc = toc_entry
583 if data_size < 0:
584 raise ZipImportError('negative data size')
585
Steve Dowerb82e17e2019-05-23 08:45:22 -0700586 with _io.open_code(archive) as fp:
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300587 # Check to make sure the local file header is correct
588 try:
589 fp.seek(file_offset)
590 except OSError:
591 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
592 buffer = fp.read(30)
593 if len(buffer) != 30:
594 raise EOFError('EOF read where not expected')
595
596 if buffer[:4] != b'PK\x03\x04':
597 # Bad: Local File Header
598 raise ZipImportError(f'bad local file header: {archive!r}', path=archive)
599
600 name_size = _unpack_uint16(buffer[26:28])
601 extra_size = _unpack_uint16(buffer[28:30])
602 header_size = 30 + name_size + extra_size
603 file_offset += header_size # Start of file data
604 try:
605 fp.seek(file_offset)
606 except OSError:
607 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
608 raw_data = fp.read(data_size)
609 if len(raw_data) != data_size:
610 raise OSError("zipimport: can't read data")
611
612 if compress == 0:
613 # data is not compressed
614 return raw_data
615
616 # Decompress with zlib
617 try:
618 decompress = _get_decompress_func()
619 except Exception:
620 raise ZipImportError("can't decompress data; zlib not available")
621 return decompress(raw_data, -15)
622
623
624# Lenient date/time comparison function. The precision of the mtime
625# in the archive is lower than the mtime stored in a .pyc: we
626# must allow a difference of at most one second.
627def _eq_mtime(t1, t2):
628 # dostime only stores even seconds, so be lenient
629 return abs(t1 - t2) <= 1
630
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500631
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300632# Given the contents of a .py[co] file, unmarshal the data
Irit Katrielfb340962020-12-19 00:09:54 +0000633# and return the code object. Raises ImportError it the magic word doesn't
634# match, or if the recorded .py[co] metadata does not match the source.
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500635def _unmarshal_code(self, pathname, fullpath, fullname, data):
636 exc_details = {
637 'name': fullname,
638 'path': fullpath,
639 }
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300640
Irit Katrielfb340962020-12-19 00:09:54 +0000641 flags = _bootstrap_external._classify_pyc(data, fullname, exc_details)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300642
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500643 hash_based = flags & 0b1 != 0
644 if hash_based:
645 check_source = flags & 0b10 != 0
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300646 if (_imp.check_hash_based_pycs != 'never' and
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500647 (check_source or _imp.check_hash_based_pycs == 'always')):
648 source_bytes = _get_pyc_source(self, fullpath)
649 if source_bytes is not None:
650 source_hash = _imp.source_hash(
651 _bootstrap_external._RAW_MAGIC_NUMBER,
652 source_bytes,
653 )
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300654
Irit Katrielfb340962020-12-19 00:09:54 +0000655 _bootstrap_external._validate_hash_pyc(
656 data, source_hash, fullname, exc_details)
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500657 else:
658 source_mtime, source_size = \
659 _get_mtime_and_size_of_source(self, fullpath)
660
661 if source_mtime:
662 # We don't use _bootstrap_external._validate_timestamp_pyc
663 # to allow for a more lenient timestamp check.
664 if (not _eq_mtime(_unpack_uint32(data[8:12]), source_mtime) or
665 _unpack_uint32(data[12:16]) != source_size):
666 _bootstrap._verbose_message(
667 f'bytecode is stale for {fullname!r}')
668 return None
669
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300670 code = marshal.loads(data[16:])
671 if not isinstance(code, _code_type):
672 raise TypeError(f'compiled module {pathname!r} is not a code object')
673 return code
674
675_code_type = type(_unmarshal_code.__code__)
676
677
678# Replace any occurrences of '\r\n?' in the input string with '\n'.
679# This converts DOS and Mac line endings to Unix line endings.
680def _normalize_line_endings(source):
681 source = source.replace(b'\r\n', b'\n')
682 source = source.replace(b'\r', b'\n')
683 return source
684
685# Given a string buffer containing Python source code, compile it
686# and return a code object.
687def _compile_source(pathname, source):
688 source = _normalize_line_endings(source)
689 return compile(source, pathname, 'exec', dont_inherit=True)
690
691# Convert the date/time values found in the Zip archive to a value
692# that's compatible with the time stamp stored in .pyc files.
693def _parse_dostime(d, t):
694 return time.mktime((
695 (d >> 9) + 1980, # bits 9..15: year
696 (d >> 5) & 0xF, # bits 5..8: month
697 d & 0x1F, # bits 0..4: day
698 t >> 11, # bits 11..15: hours
699 (t >> 5) & 0x3F, # bits 8..10: minutes
700 (t & 0x1F) * 2, # bits 0..7: seconds / 2
701 -1, -1, -1))
702
703# Given a path to a .pyc file in the archive, return the
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500704# modification time of the matching .py file and its size,
705# or (0, 0) if no source is available.
706def _get_mtime_and_size_of_source(self, path):
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300707 try:
708 # strip 'c' or 'o' from *.py[co]
709 assert path[-1:] in ('c', 'o')
710 path = path[:-1]
711 toc_entry = self._files[path]
712 # fetch the time stamp of the .py file for comparison
713 # with an embedded pyc time stamp
714 time = toc_entry[5]
715 date = toc_entry[6]
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500716 uncompressed_size = toc_entry[3]
717 return _parse_dostime(date, time), uncompressed_size
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300718 except (KeyError, IndexError, TypeError):
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500719 return 0, 0
720
721
722# Given a path to a .pyc file in the archive, return the
723# contents of the matching .py file, or None if no source
724# is available.
725def _get_pyc_source(self, path):
726 # strip 'c' or 'o' from *.py[co]
727 assert path[-1:] in ('c', 'o')
728 path = path[:-1]
729
730 try:
731 toc_entry = self._files[path]
732 except KeyError:
733 return None
734 else:
735 return _get_data(self.archive, toc_entry)
736
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300737
738# Get the code object associated with the module specified by
739# 'fullname'.
740def _get_module_code(self, fullname):
741 path = _get_module_path(self, fullname)
Irit Katrielfb340962020-12-19 00:09:54 +0000742 import_error = None
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300743 for suffix, isbytecode, ispackage in _zip_searchorder:
744 fullpath = path + suffix
745 _bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2)
746 try:
747 toc_entry = self._files[fullpath]
748 except KeyError:
749 pass
750 else:
751 modpath = toc_entry[0]
752 data = _get_data(self.archive, toc_entry)
Irit Katrielfb340962020-12-19 00:09:54 +0000753 code = None
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300754 if isbytecode:
Irit Katrielfb340962020-12-19 00:09:54 +0000755 try:
756 code = _unmarshal_code(self, modpath, fullpath, fullname, data)
757 except ImportError as exc:
758 import_error = exc
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300759 else:
760 code = _compile_source(modpath, data)
761 if code is None:
762 # bad magic number or non-matching mtime
763 # in byte code, try next
764 continue
765 modpath = toc_entry[0]
766 return code, ispackage, modpath
767 else:
Irit Katrielfb340962020-12-19 00:09:54 +0000768 if import_error:
769 msg = f"module load failed: {import_error}"
770 raise ZipImportError(msg, name=fullname) from import_error
771 else:
772 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)