blob: 2e5188a4a0aa54c629fb150fb88ad27cf6879486 [file] [log] [blame]
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +03001"""zipimport provides support for importing Python modules from Zip archives.
2
3This module exports three objects:
4- zipimporter: a class; its constructor takes a path to a Zip archive.
5- ZipImportError: exception raised by zipimporter objects. It's a
6 subclass of ImportError, so it can be caught as ImportError, too.
7- _zip_directory_cache: a dict, mapping archive paths to zip directory
8 info dicts, as used in zipimporter._files.
9
10It is usually not needed to use the zipimport module explicitly; it is
11used by the builtin import mechanism for sys.path items that are paths
12to Zip archives.
13"""
14
15#from importlib import _bootstrap_external
16#from importlib import _bootstrap # for _verbose_message
17import _frozen_importlib_external as _bootstrap_external
18from _frozen_importlib_external import _unpack_uint16, _unpack_uint32
19import _frozen_importlib as _bootstrap # for _verbose_message
20import _imp # for check_hash_based_pycs
21import _io # for open
22import marshal # for loads
23import sys # for modules
24import time # for mktime
25
26__all__ = ['ZipImportError', 'zipimporter']
27
28
29path_sep = _bootstrap_external.path_sep
30alt_path_sep = _bootstrap_external.path_separators[1:]
31
32
33class ZipImportError(ImportError):
34 pass
35
36# _read_directory() cache
37_zip_directory_cache = {}
38
39_module_type = type(sys)
40
Zackery Spytz5a5ce062018-09-25 13:15:47 -060041END_CENTRAL_DIR_SIZE = 22
42STRING_END_ARCHIVE = b'PK\x05\x06'
43MAX_COMMENT_LEN = (1 << 16) - 1
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +030044
Brett Cannond2e94bb2020-11-13 15:14:58 -080045class zipimporter(_bootstrap_external._LoaderBasics):
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +030046 """zipimporter(archivepath) -> zipimporter object
47
48 Create a new zipimporter instance. 'archivepath' must be a path to
49 a zipfile, or to a specific path inside a zipfile. For example, it can be
50 '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a
51 valid directory inside the archive.
52
53 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip
54 archive.
55
56 The 'archive' attribute of zipimporter objects contains the name of the
57 zipfile targeted.
58 """
59
60 # Split the "subdirectory" from the Zip archive path, lookup a matching
61 # entry in sys.path_importer_cache, fetch the file directory from there
62 # if found, or else read it from the archive.
63 def __init__(self, path):
64 if not isinstance(path, str):
65 import os
66 path = os.fsdecode(path)
67 if not path:
68 raise ZipImportError('archive path is empty', path=path)
69 if alt_path_sep:
70 path = path.replace(alt_path_sep, path_sep)
71
72 prefix = []
73 while True:
74 try:
75 st = _bootstrap_external._path_stat(path)
76 except (OSError, ValueError):
77 # On Windows a ValueError is raised for too long paths.
78 # Back up one path element.
79 dirname, basename = _bootstrap_external._path_split(path)
80 if dirname == path:
81 raise ZipImportError('not a Zip file', path=path)
82 path = dirname
83 prefix.append(basename)
84 else:
85 # it exists
86 if (st.st_mode & 0o170000) != 0o100000: # stat.S_ISREG
87 # it's a not file
88 raise ZipImportError('not a Zip file', path=path)
89 break
90
91 try:
92 files = _zip_directory_cache[path]
93 except KeyError:
94 files = _read_directory(path)
95 _zip_directory_cache[path] = files
96 self._files = files
97 self.archive = path
98 # a prefix directory following the ZIP file path.
99 self.prefix = _bootstrap_external._path_join(*prefix[::-1])
100 if self.prefix:
101 self.prefix += path_sep
102
103
104 # Check whether we can satisfy the import of the module named by
105 # 'fullname', or whether it could be a portion of a namespace
106 # package. Return self if we can load it, a string containing the
107 # full path if it's a possible namespace portion, None if we
108 # can't load it.
109 def find_loader(self, fullname, path=None):
110 """find_loader(fullname, path=None) -> self, str or None.
111
112 Search for a module specified by 'fullname'. 'fullname' must be the
113 fully qualified (dotted) module name. It returns the zipimporter
114 instance itself if the module was found, a string containing the
115 full path name if it's possibly a portion of a namespace package,
116 or None otherwise. The optional 'path' argument is ignored -- it's
117 there for compatibility with the importer protocol.
Brett Cannond2e94bb2020-11-13 15:14:58 -0800118
119 Deprecated since Python 3.10. Use find_spec() instead.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300120 """
121 mi = _get_module_info(self, fullname)
122 if mi is not None:
123 # This is a module or package.
124 return self, []
125
126 # Not a module or regular package. See if this is a directory, and
127 # therefore possibly a portion of a namespace package.
128
129 # We're only interested in the last path component of fullname
130 # earlier components are recorded in self.prefix.
131 modpath = _get_module_path(self, fullname)
132 if _is_dir(self, modpath):
133 # This is possibly a portion of a namespace
134 # package. Return the string representing its path,
135 # without a trailing separator.
136 return None, [f'{self.archive}{path_sep}{modpath}']
137
138 return None, []
139
140
141 # Check whether we can satisfy the import of the module named by
142 # 'fullname'. Return self if we can, None if we can't.
143 def find_module(self, fullname, path=None):
144 """find_module(fullname, path=None) -> self or None.
145
146 Search for a module specified by 'fullname'. 'fullname' must be the
147 fully qualified (dotted) module name. It returns the zipimporter
148 instance itself if the module was found, or None if it wasn't.
149 The optional 'path' argument is ignored -- it's there for compatibility
150 with the importer protocol.
Brett Cannond2e94bb2020-11-13 15:14:58 -0800151
152 Deprecated since Python 3.10. Use find_spec() instead.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300153 """
154 return self.find_loader(fullname, path)[0]
155
Brett Cannond2e94bb2020-11-13 15:14:58 -0800156 def find_spec(self, fullname, target=None):
157 """Create a ModuleSpec for the specified module.
158
159 Returns None if the module cannot be found.
160 """
161 module_info = _get_module_info(self, fullname)
162 if module_info is not None:
163 return _bootstrap.spec_from_loader(fullname, self, is_package=module_info)
164 else:
165 # Not a module or regular package. See if this is a directory, and
166 # therefore possibly a portion of a namespace package.
167
168 # We're only interested in the last path component of fullname
169 # earlier components are recorded in self.prefix.
170 modpath = _get_module_path(self, fullname)
171 if _is_dir(self, modpath):
172 # This is possibly a portion of a namespace
173 # package. Return the string representing its path,
174 # without a trailing separator.
175 path = f'{self.archive}{path_sep}{modpath}'
176 spec = _bootstrap.ModuleSpec(name=fullname, loader=None,
177 is_package=True)
178 spec.submodule_search_locations.append(path)
179 return spec
180 else:
181 return None
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300182
183 def get_code(self, fullname):
184 """get_code(fullname) -> code object.
185
186 Return the code object for the specified module. Raise ZipImportError
187 if the module couldn't be found.
188 """
189 code, ispackage, modpath = _get_module_code(self, fullname)
190 return code
191
192
193 def get_data(self, pathname):
194 """get_data(pathname) -> string with file data.
195
196 Return the data associated with 'pathname'. Raise OSError if
197 the file wasn't found.
198 """
199 if alt_path_sep:
200 pathname = pathname.replace(alt_path_sep, path_sep)
201
202 key = pathname
203 if pathname.startswith(self.archive + path_sep):
204 key = pathname[len(self.archive + path_sep):]
205
206 try:
207 toc_entry = self._files[key]
208 except KeyError:
209 raise OSError(0, '', key)
210 return _get_data(self.archive, toc_entry)
211
212
213 # Return a string matching __file__ for the named module
214 def get_filename(self, fullname):
215 """get_filename(fullname) -> filename string.
216
217 Return the filename for the specified module.
218 """
219 # Deciding the filename requires working out where the code
220 # would come from if the module was actually loaded
221 code, ispackage, modpath = _get_module_code(self, fullname)
222 return modpath
223
224
225 def get_source(self, fullname):
226 """get_source(fullname) -> source string.
227
228 Return the source code for the specified module. Raise ZipImportError
229 if the module couldn't be found, return None if the archive does
230 contain the module, but has no source for it.
231 """
232 mi = _get_module_info(self, fullname)
233 if mi is None:
234 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
235
236 path = _get_module_path(self, fullname)
237 if mi:
238 fullpath = _bootstrap_external._path_join(path, '__init__.py')
239 else:
240 fullpath = f'{path}.py'
241
242 try:
243 toc_entry = self._files[fullpath]
244 except KeyError:
245 # we have the module, but no source
246 return None
247 return _get_data(self.archive, toc_entry).decode()
248
249
250 # Return a bool signifying whether the module is a package or not.
251 def is_package(self, fullname):
252 """is_package(fullname) -> bool.
253
254 Return True if the module specified by fullname is a package.
255 Raise ZipImportError if the module couldn't be found.
256 """
257 mi = _get_module_info(self, fullname)
258 if mi is None:
259 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
260 return mi
261
262
263 # Load and return the module named by 'fullname'.
264 def load_module(self, fullname):
265 """load_module(fullname) -> module.
266
267 Load the module specified by 'fullname'. 'fullname' must be the
268 fully qualified (dotted) module name. It returns the imported
269 module, or raises ZipImportError if it wasn't found.
Brett Cannond2e94bb2020-11-13 15:14:58 -0800270
271 Deprecated since Python 3.10. use exec_module() instead.
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300272 """
273 code, ispackage, modpath = _get_module_code(self, fullname)
274 mod = sys.modules.get(fullname)
275 if mod is None or not isinstance(mod, _module_type):
276 mod = _module_type(fullname)
277 sys.modules[fullname] = mod
278 mod.__loader__ = self
279
280 try:
281 if ispackage:
282 # add __path__ to the module *before* the code gets
283 # executed
284 path = _get_module_path(self, fullname)
285 fullpath = _bootstrap_external._path_join(self.archive, path)
286 mod.__path__ = [fullpath]
287
288 if not hasattr(mod, '__builtins__'):
289 mod.__builtins__ = __builtins__
290 _bootstrap_external._fix_up_module(mod.__dict__, fullname, modpath)
291 exec(code, mod.__dict__)
292 except:
293 del sys.modules[fullname]
294 raise
295
296 try:
297 mod = sys.modules[fullname]
298 except KeyError:
299 raise ImportError(f'Loaded module {fullname!r} not found in sys.modules')
300 _bootstrap._verbose_message('import {} # loaded from Zip {}', fullname, modpath)
301 return mod
302
303
304 def get_resource_reader(self, fullname):
305 """Return the ResourceReader for a package in a zip file.
306
307 If 'fullname' is a package within the zip file, return the
308 'ResourceReader' object for the package. Otherwise return None.
309 """
Serhiy Storchaka9da39612018-09-19 09:28:06 +0300310 try:
311 if not self.is_package(fullname):
312 return None
313 except ZipImportError:
314 return None
Jason R. Coombs843c2772020-06-07 21:00:51 -0400315 from importlib.readers import ZipReader
316 return ZipReader(self, fullname)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300317
318
319 def __repr__(self):
320 return f'<zipimporter object "{self.archive}{path_sep}{self.prefix}">'
321
322
323# _zip_searchorder defines how we search for a module in the Zip
324# archive: we first search for a package __init__, then for
325# non-package .pyc, and .py entries. The .pyc entries
326# are swapped by initzipimport() if we run in optimized mode. Also,
327# '/' is replaced by path_sep there.
328_zip_searchorder = (
329 (path_sep + '__init__.pyc', True, True),
330 (path_sep + '__init__.py', False, True),
331 ('.pyc', True, False),
332 ('.py', False, False),
333)
334
335# Given a module name, return the potential file path in the
336# archive (without extension).
337def _get_module_path(self, fullname):
338 return self.prefix + fullname.rpartition('.')[2]
339
340# Does this path represent a directory?
341def _is_dir(self, path):
342 # See if this is a "directory". If so, it's eligible to be part
343 # of a namespace package. We test by seeing if the name, with an
344 # appended path separator, exists.
345 dirpath = path + path_sep
346 # If dirpath is present in self._files, we have a directory.
347 return dirpath in self._files
348
349# Return some information about a module.
350def _get_module_info(self, fullname):
351 path = _get_module_path(self, fullname)
352 for suffix, isbytecode, ispackage in _zip_searchorder:
353 fullpath = path + suffix
354 if fullpath in self._files:
355 return ispackage
356 return None
357
358
359# implementation
360
361# _read_directory(archive) -> files dict (new reference)
362#
363# Given a path to a Zip archive, build a dict, mapping file names
364# (local to the archive, using SEP as a separator) to toc entries.
365#
366# A toc_entry is a tuple:
367#
368# (__file__, # value to use for __file__, available for all files,
369# # encoded to the filesystem encoding
370# compress, # compression kind; 0 for uncompressed
371# data_size, # size of compressed data on disk
372# file_size, # size of decompressed data
373# file_offset, # offset of file header from start of archive
374# time, # mod time of file (in dos format)
375# date, # mod data of file (in dos format)
376# crc, # crc checksum of the data
377# )
378#
379# Directories can be recognized by the trailing path_sep in the name,
380# data_size and file_offset are 0.
381def _read_directory(archive):
382 try:
Steve Dowerb82e17e2019-05-23 08:45:22 -0700383 fp = _io.open_code(archive)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300384 except OSError:
385 raise ZipImportError(f"can't open Zip file: {archive!r}", path=archive)
386
387 with fp:
388 try:
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600389 fp.seek(-END_CENTRAL_DIR_SIZE, 2)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300390 header_position = fp.tell()
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600391 buffer = fp.read(END_CENTRAL_DIR_SIZE)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300392 except OSError:
393 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600394 if len(buffer) != END_CENTRAL_DIR_SIZE:
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300395 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600396 if buffer[:4] != STRING_END_ARCHIVE:
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300397 # Bad: End of Central Dir signature
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600398 # Check if there's a comment.
399 try:
400 fp.seek(0, 2)
401 file_size = fp.tell()
402 except OSError:
403 raise ZipImportError(f"can't read Zip file: {archive!r}",
404 path=archive)
405 max_comment_start = max(file_size - MAX_COMMENT_LEN -
406 END_CENTRAL_DIR_SIZE, 0)
407 try:
408 fp.seek(max_comment_start)
409 data = fp.read()
410 except OSError:
411 raise ZipImportError(f"can't read Zip file: {archive!r}",
412 path=archive)
413 pos = data.rfind(STRING_END_ARCHIVE)
414 if pos < 0:
415 raise ZipImportError(f'not a Zip file: {archive!r}',
416 path=archive)
417 buffer = data[pos:pos+END_CENTRAL_DIR_SIZE]
418 if len(buffer) != END_CENTRAL_DIR_SIZE:
419 raise ZipImportError(f"corrupt Zip file: {archive!r}",
420 path=archive)
421 header_position = file_size - len(data) + pos
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300422
423 header_size = _unpack_uint32(buffer[12:16])
424 header_offset = _unpack_uint32(buffer[16:20])
425 if header_position < header_size:
426 raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
427 if header_position < header_offset:
428 raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
429 header_position -= header_size
430 arc_offset = header_position - header_offset
431 if arc_offset < 0:
432 raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)
433
434 files = {}
435 # Start of Central Directory
436 count = 0
437 try:
438 fp.seek(header_position)
439 except OSError:
440 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
441 while True:
442 buffer = fp.read(46)
443 if len(buffer) < 4:
444 raise EOFError('EOF read where not expected')
445 # Start of file header
446 if buffer[:4] != b'PK\x01\x02':
447 break # Bad: Central Dir File Header
448 if len(buffer) != 46:
449 raise EOFError('EOF read where not expected')
450 flags = _unpack_uint16(buffer[8:10])
451 compress = _unpack_uint16(buffer[10:12])
452 time = _unpack_uint16(buffer[12:14])
453 date = _unpack_uint16(buffer[14:16])
454 crc = _unpack_uint32(buffer[16:20])
455 data_size = _unpack_uint32(buffer[20:24])
456 file_size = _unpack_uint32(buffer[24:28])
457 name_size = _unpack_uint16(buffer[28:30])
458 extra_size = _unpack_uint16(buffer[30:32])
459 comment_size = _unpack_uint16(buffer[32:34])
460 file_offset = _unpack_uint32(buffer[42:46])
461 header_size = name_size + extra_size + comment_size
462 if file_offset > header_offset:
463 raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
464 file_offset += arc_offset
465
466 try:
467 name = fp.read(name_size)
468 except OSError:
469 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
470 if len(name) != name_size:
471 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
472 # On Windows, calling fseek to skip over the fields we don't use is
473 # slower than reading the data because fseek flushes stdio's
474 # internal buffers. See issue #8745.
475 try:
476 if len(fp.read(header_size - name_size)) != header_size - name_size:
477 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
478 except OSError:
479 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
480
481 if flags & 0x800:
482 # UTF-8 file names extension
483 name = name.decode()
484 else:
485 # Historical ZIP filename encoding
486 try:
487 name = name.decode('ascii')
488 except UnicodeDecodeError:
489 name = name.decode('latin1').translate(cp437_table)
490
491 name = name.replace('/', path_sep)
492 path = _bootstrap_external._path_join(archive, name)
493 t = (path, compress, data_size, file_size, file_offset, time, date, crc)
494 files[name] = t
495 count += 1
496 _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive)
497 return files
498
499# During bootstrap, we may need to load the encodings
500# package from a ZIP file. But the cp437 encoding is implemented
501# in Python in the encodings package.
502#
503# Break out of this dependency by using the translation table for
504# the cp437 encoding.
505cp437_table = (
506 # ASCII part, 8 rows x 16 chars
507 '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
508 '\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'
509 ' !"#$%&\'()*+,-./'
510 '0123456789:;<=>?'
511 '@ABCDEFGHIJKLMNO'
512 'PQRSTUVWXYZ[\\]^_'
513 '`abcdefghijklmno'
514 'pqrstuvwxyz{|}~\x7f'
515 # non-ASCII part, 16 rows x 8 chars
516 '\xc7\xfc\xe9\xe2\xe4\xe0\xe5\xe7'
517 '\xea\xeb\xe8\xef\xee\xec\xc4\xc5'
518 '\xc9\xe6\xc6\xf4\xf6\xf2\xfb\xf9'
519 '\xff\xd6\xdc\xa2\xa3\xa5\u20a7\u0192'
520 '\xe1\xed\xf3\xfa\xf1\xd1\xaa\xba'
521 '\xbf\u2310\xac\xbd\xbc\xa1\xab\xbb'
522 '\u2591\u2592\u2593\u2502\u2524\u2561\u2562\u2556'
523 '\u2555\u2563\u2551\u2557\u255d\u255c\u255b\u2510'
524 '\u2514\u2534\u252c\u251c\u2500\u253c\u255e\u255f'
525 '\u255a\u2554\u2569\u2566\u2560\u2550\u256c\u2567'
526 '\u2568\u2564\u2565\u2559\u2558\u2552\u2553\u256b'
527 '\u256a\u2518\u250c\u2588\u2584\u258c\u2590\u2580'
528 '\u03b1\xdf\u0393\u03c0\u03a3\u03c3\xb5\u03c4'
529 '\u03a6\u0398\u03a9\u03b4\u221e\u03c6\u03b5\u2229'
530 '\u2261\xb1\u2265\u2264\u2320\u2321\xf7\u2248'
531 '\xb0\u2219\xb7\u221a\u207f\xb2\u25a0\xa0'
532)
533
534_importing_zlib = False
535
536# Return the zlib.decompress function object, or NULL if zlib couldn't
537# be imported. The function is cached when found, so subsequent calls
538# don't import zlib again.
539def _get_decompress_func():
540 global _importing_zlib
541 if _importing_zlib:
542 # Someone has a zlib.py[co] in their Zip file
543 # let's avoid a stack overflow.
544 _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
545 raise ZipImportError("can't decompress data; zlib not available")
546
547 _importing_zlib = True
548 try:
549 from zlib import decompress
550 except Exception:
551 _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
552 raise ZipImportError("can't decompress data; zlib not available")
553 finally:
554 _importing_zlib = False
555
556 _bootstrap._verbose_message('zipimport: zlib available')
557 return decompress
558
559# Given a path to a Zip file and a toc_entry, return the (uncompressed) data.
560def _get_data(archive, toc_entry):
561 datapath, compress, data_size, file_size, file_offset, time, date, crc = toc_entry
562 if data_size < 0:
563 raise ZipImportError('negative data size')
564
Steve Dowerb82e17e2019-05-23 08:45:22 -0700565 with _io.open_code(archive) as fp:
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300566 # Check to make sure the local file header is correct
567 try:
568 fp.seek(file_offset)
569 except OSError:
570 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
571 buffer = fp.read(30)
572 if len(buffer) != 30:
573 raise EOFError('EOF read where not expected')
574
575 if buffer[:4] != b'PK\x03\x04':
576 # Bad: Local File Header
577 raise ZipImportError(f'bad local file header: {archive!r}', path=archive)
578
579 name_size = _unpack_uint16(buffer[26:28])
580 extra_size = _unpack_uint16(buffer[28:30])
581 header_size = 30 + name_size + extra_size
582 file_offset += header_size # Start of file data
583 try:
584 fp.seek(file_offset)
585 except OSError:
586 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
587 raw_data = fp.read(data_size)
588 if len(raw_data) != data_size:
589 raise OSError("zipimport: can't read data")
590
591 if compress == 0:
592 # data is not compressed
593 return raw_data
594
595 # Decompress with zlib
596 try:
597 decompress = _get_decompress_func()
598 except Exception:
599 raise ZipImportError("can't decompress data; zlib not available")
600 return decompress(raw_data, -15)
601
602
603# Lenient date/time comparison function. The precision of the mtime
604# in the archive is lower than the mtime stored in a .pyc: we
605# must allow a difference of at most one second.
606def _eq_mtime(t1, t2):
607 # dostime only stores even seconds, so be lenient
608 return abs(t1 - t2) <= 1
609
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500610
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300611# Given the contents of a .py[co] file, unmarshal the data
612# and return the code object. Return None if it the magic word doesn't
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500613# match, or if the recorded .py[co] metadata does not match the source,
614# (we do this instead of raising an exception as we fall back
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300615# to .py if available and we don't want to mask other errors).
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500616def _unmarshal_code(self, pathname, fullpath, fullname, data):
617 exc_details = {
618 'name': fullname,
619 'path': fullpath,
620 }
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300621
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500622 try:
623 flags = _bootstrap_external._classify_pyc(data, fullname, exc_details)
624 except ImportError:
625 return None
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300626
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500627 hash_based = flags & 0b1 != 0
628 if hash_based:
629 check_source = flags & 0b10 != 0
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300630 if (_imp.check_hash_based_pycs != 'never' and
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500631 (check_source or _imp.check_hash_based_pycs == 'always')):
632 source_bytes = _get_pyc_source(self, fullpath)
633 if source_bytes is not None:
634 source_hash = _imp.source_hash(
635 _bootstrap_external._RAW_MAGIC_NUMBER,
636 source_bytes,
637 )
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300638
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500639 try:
Xtreak79f02fe2019-12-16 05:04:12 +0530640 _bootstrap_external._validate_hash_pyc(
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500641 data, source_hash, fullname, exc_details)
642 except ImportError:
643 return None
644 else:
645 source_mtime, source_size = \
646 _get_mtime_and_size_of_source(self, fullpath)
647
648 if source_mtime:
649 # We don't use _bootstrap_external._validate_timestamp_pyc
650 # to allow for a more lenient timestamp check.
651 if (not _eq_mtime(_unpack_uint32(data[8:12]), source_mtime) or
652 _unpack_uint32(data[12:16]) != source_size):
653 _bootstrap._verbose_message(
654 f'bytecode is stale for {fullname!r}')
655 return None
656
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300657 code = marshal.loads(data[16:])
658 if not isinstance(code, _code_type):
659 raise TypeError(f'compiled module {pathname!r} is not a code object')
660 return code
661
662_code_type = type(_unmarshal_code.__code__)
663
664
665# Replace any occurrences of '\r\n?' in the input string with '\n'.
666# This converts DOS and Mac line endings to Unix line endings.
667def _normalize_line_endings(source):
668 source = source.replace(b'\r\n', b'\n')
669 source = source.replace(b'\r', b'\n')
670 return source
671
672# Given a string buffer containing Python source code, compile it
673# and return a code object.
674def _compile_source(pathname, source):
675 source = _normalize_line_endings(source)
676 return compile(source, pathname, 'exec', dont_inherit=True)
677
678# Convert the date/time values found in the Zip archive to a value
679# that's compatible with the time stamp stored in .pyc files.
680def _parse_dostime(d, t):
681 return time.mktime((
682 (d >> 9) + 1980, # bits 9..15: year
683 (d >> 5) & 0xF, # bits 5..8: month
684 d & 0x1F, # bits 0..4: day
685 t >> 11, # bits 11..15: hours
686 (t >> 5) & 0x3F, # bits 8..10: minutes
687 (t & 0x1F) * 2, # bits 0..7: seconds / 2
688 -1, -1, -1))
689
690# Given a path to a .pyc file in the archive, return the
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500691# modification time of the matching .py file and its size,
692# or (0, 0) if no source is available.
693def _get_mtime_and_size_of_source(self, path):
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300694 try:
695 # strip 'c' or 'o' from *.py[co]
696 assert path[-1:] in ('c', 'o')
697 path = path[:-1]
698 toc_entry = self._files[path]
699 # fetch the time stamp of the .py file for comparison
700 # with an embedded pyc time stamp
701 time = toc_entry[5]
702 date = toc_entry[6]
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500703 uncompressed_size = toc_entry[3]
704 return _parse_dostime(date, time), uncompressed_size
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300705 except (KeyError, IndexError, TypeError):
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500706 return 0, 0
707
708
709# Given a path to a .pyc file in the archive, return the
710# contents of the matching .py file, or None if no source
711# is available.
712def _get_pyc_source(self, path):
713 # strip 'c' or 'o' from *.py[co]
714 assert path[-1:] in ('c', 'o')
715 path = path[:-1]
716
717 try:
718 toc_entry = self._files[path]
719 except KeyError:
720 return None
721 else:
722 return _get_data(self.archive, toc_entry)
723
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300724
725# Get the code object associated with the module specified by
726# 'fullname'.
727def _get_module_code(self, fullname):
728 path = _get_module_path(self, fullname)
729 for suffix, isbytecode, ispackage in _zip_searchorder:
730 fullpath = path + suffix
731 _bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2)
732 try:
733 toc_entry = self._files[fullpath]
734 except KeyError:
735 pass
736 else:
737 modpath = toc_entry[0]
738 data = _get_data(self.archive, toc_entry)
739 if isbytecode:
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500740 code = _unmarshal_code(self, modpath, fullpath, fullname, data)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300741 else:
742 code = _compile_source(modpath, data)
743 if code is None:
744 # bad magic number or non-matching mtime
745 # in byte code, try next
746 continue
747 modpath = toc_entry[0]
748 return code, ispackage, modpath
749 else:
750 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)