blob: f430abd6a77c59820185aa400be935e103ab0bd3 [file] [log] [blame]
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +03001"""zipimport provides support for importing Python modules from Zip archives.
2
3This module exports three objects:
4- zipimporter: a class; its constructor takes a path to a Zip archive.
5- ZipImportError: exception raised by zipimporter objects. It's a
6 subclass of ImportError, so it can be caught as ImportError, too.
7- _zip_directory_cache: a dict, mapping archive paths to zip directory
8 info dicts, as used in zipimporter._files.
9
10It is usually not needed to use the zipimport module explicitly; it is
11used by the builtin import mechanism for sys.path items that are paths
12to Zip archives.
13"""
14
15#from importlib import _bootstrap_external
16#from importlib import _bootstrap # for _verbose_message
17import _frozen_importlib_external as _bootstrap_external
18from _frozen_importlib_external import _unpack_uint16, _unpack_uint32
19import _frozen_importlib as _bootstrap # for _verbose_message
20import _imp # for check_hash_based_pycs
21import _io # for open
22import marshal # for loads
23import sys # for modules
24import time # for mktime
25
26__all__ = ['ZipImportError', 'zipimporter']
27
28
29path_sep = _bootstrap_external.path_sep
30alt_path_sep = _bootstrap_external.path_separators[1:]
31
32
33class ZipImportError(ImportError):
34 pass
35
36# _read_directory() cache
37_zip_directory_cache = {}
38
39_module_type = type(sys)
40
Zackery Spytz5a5ce062018-09-25 13:15:47 -060041END_CENTRAL_DIR_SIZE = 22
42STRING_END_ARCHIVE = b'PK\x05\x06'
43MAX_COMMENT_LEN = (1 << 16) - 1
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +030044
45class zipimporter:
46 """zipimporter(archivepath) -> zipimporter object
47
48 Create a new zipimporter instance. 'archivepath' must be a path to
49 a zipfile, or to a specific path inside a zipfile. For example, it can be
50 '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a
51 valid directory inside the archive.
52
53 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip
54 archive.
55
56 The 'archive' attribute of zipimporter objects contains the name of the
57 zipfile targeted.
58 """
59
60 # Split the "subdirectory" from the Zip archive path, lookup a matching
61 # entry in sys.path_importer_cache, fetch the file directory from there
62 # if found, or else read it from the archive.
63 def __init__(self, path):
64 if not isinstance(path, str):
65 import os
66 path = os.fsdecode(path)
67 if not path:
68 raise ZipImportError('archive path is empty', path=path)
69 if alt_path_sep:
70 path = path.replace(alt_path_sep, path_sep)
71
72 prefix = []
73 while True:
74 try:
75 st = _bootstrap_external._path_stat(path)
76 except (OSError, ValueError):
77 # On Windows a ValueError is raised for too long paths.
78 # Back up one path element.
79 dirname, basename = _bootstrap_external._path_split(path)
80 if dirname == path:
81 raise ZipImportError('not a Zip file', path=path)
82 path = dirname
83 prefix.append(basename)
84 else:
85 # it exists
86 if (st.st_mode & 0o170000) != 0o100000: # stat.S_ISREG
87 # it's a not file
88 raise ZipImportError('not a Zip file', path=path)
89 break
90
91 try:
92 files = _zip_directory_cache[path]
93 except KeyError:
94 files = _read_directory(path)
95 _zip_directory_cache[path] = files
96 self._files = files
97 self.archive = path
98 # a prefix directory following the ZIP file path.
99 self.prefix = _bootstrap_external._path_join(*prefix[::-1])
100 if self.prefix:
101 self.prefix += path_sep
102
103
104 # Check whether we can satisfy the import of the module named by
105 # 'fullname', or whether it could be a portion of a namespace
106 # package. Return self if we can load it, a string containing the
107 # full path if it's a possible namespace portion, None if we
108 # can't load it.
109 def find_loader(self, fullname, path=None):
110 """find_loader(fullname, path=None) -> self, str or None.
111
112 Search for a module specified by 'fullname'. 'fullname' must be the
113 fully qualified (dotted) module name. It returns the zipimporter
114 instance itself if the module was found, a string containing the
115 full path name if it's possibly a portion of a namespace package,
116 or None otherwise. The optional 'path' argument is ignored -- it's
117 there for compatibility with the importer protocol.
118 """
119 mi = _get_module_info(self, fullname)
120 if mi is not None:
121 # This is a module or package.
122 return self, []
123
124 # Not a module or regular package. See if this is a directory, and
125 # therefore possibly a portion of a namespace package.
126
127 # We're only interested in the last path component of fullname
128 # earlier components are recorded in self.prefix.
129 modpath = _get_module_path(self, fullname)
130 if _is_dir(self, modpath):
131 # This is possibly a portion of a namespace
132 # package. Return the string representing its path,
133 # without a trailing separator.
134 return None, [f'{self.archive}{path_sep}{modpath}']
135
136 return None, []
137
138
139 # Check whether we can satisfy the import of the module named by
140 # 'fullname'. Return self if we can, None if we can't.
141 def find_module(self, fullname, path=None):
142 """find_module(fullname, path=None) -> self or None.
143
144 Search for a module specified by 'fullname'. 'fullname' must be the
145 fully qualified (dotted) module name. It returns the zipimporter
146 instance itself if the module was found, or None if it wasn't.
147 The optional 'path' argument is ignored -- it's there for compatibility
148 with the importer protocol.
149 """
150 return self.find_loader(fullname, path)[0]
151
152
153 def get_code(self, fullname):
154 """get_code(fullname) -> code object.
155
156 Return the code object for the specified module. Raise ZipImportError
157 if the module couldn't be found.
158 """
159 code, ispackage, modpath = _get_module_code(self, fullname)
160 return code
161
162
163 def get_data(self, pathname):
164 """get_data(pathname) -> string with file data.
165
166 Return the data associated with 'pathname'. Raise OSError if
167 the file wasn't found.
168 """
169 if alt_path_sep:
170 pathname = pathname.replace(alt_path_sep, path_sep)
171
172 key = pathname
173 if pathname.startswith(self.archive + path_sep):
174 key = pathname[len(self.archive + path_sep):]
175
176 try:
177 toc_entry = self._files[key]
178 except KeyError:
179 raise OSError(0, '', key)
180 return _get_data(self.archive, toc_entry)
181
182
183 # Return a string matching __file__ for the named module
184 def get_filename(self, fullname):
185 """get_filename(fullname) -> filename string.
186
187 Return the filename for the specified module.
188 """
189 # Deciding the filename requires working out where the code
190 # would come from if the module was actually loaded
191 code, ispackage, modpath = _get_module_code(self, fullname)
192 return modpath
193
194
195 def get_source(self, fullname):
196 """get_source(fullname) -> source string.
197
198 Return the source code for the specified module. Raise ZipImportError
199 if the module couldn't be found, return None if the archive does
200 contain the module, but has no source for it.
201 """
202 mi = _get_module_info(self, fullname)
203 if mi is None:
204 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
205
206 path = _get_module_path(self, fullname)
207 if mi:
208 fullpath = _bootstrap_external._path_join(path, '__init__.py')
209 else:
210 fullpath = f'{path}.py'
211
212 try:
213 toc_entry = self._files[fullpath]
214 except KeyError:
215 # we have the module, but no source
216 return None
217 return _get_data(self.archive, toc_entry).decode()
218
219
220 # Return a bool signifying whether the module is a package or not.
221 def is_package(self, fullname):
222 """is_package(fullname) -> bool.
223
224 Return True if the module specified by fullname is a package.
225 Raise ZipImportError if the module couldn't be found.
226 """
227 mi = _get_module_info(self, fullname)
228 if mi is None:
229 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
230 return mi
231
232
233 # Load and return the module named by 'fullname'.
234 def load_module(self, fullname):
235 """load_module(fullname) -> module.
236
237 Load the module specified by 'fullname'. 'fullname' must be the
238 fully qualified (dotted) module name. It returns the imported
239 module, or raises ZipImportError if it wasn't found.
240 """
241 code, ispackage, modpath = _get_module_code(self, fullname)
242 mod = sys.modules.get(fullname)
243 if mod is None or not isinstance(mod, _module_type):
244 mod = _module_type(fullname)
245 sys.modules[fullname] = mod
246 mod.__loader__ = self
247
248 try:
249 if ispackage:
250 # add __path__ to the module *before* the code gets
251 # executed
252 path = _get_module_path(self, fullname)
253 fullpath = _bootstrap_external._path_join(self.archive, path)
254 mod.__path__ = [fullpath]
255
256 if not hasattr(mod, '__builtins__'):
257 mod.__builtins__ = __builtins__
258 _bootstrap_external._fix_up_module(mod.__dict__, fullname, modpath)
259 exec(code, mod.__dict__)
260 except:
261 del sys.modules[fullname]
262 raise
263
264 try:
265 mod = sys.modules[fullname]
266 except KeyError:
267 raise ImportError(f'Loaded module {fullname!r} not found in sys.modules')
268 _bootstrap._verbose_message('import {} # loaded from Zip {}', fullname, modpath)
269 return mod
270
271
272 def get_resource_reader(self, fullname):
273 """Return the ResourceReader for a package in a zip file.
274
275 If 'fullname' is a package within the zip file, return the
276 'ResourceReader' object for the package. Otherwise return None.
277 """
Serhiy Storchaka9da39612018-09-19 09:28:06 +0300278 try:
279 if not self.is_package(fullname):
280 return None
281 except ZipImportError:
282 return None
283 if not _ZipImportResourceReader._registered:
284 from importlib.abc import ResourceReader
285 ResourceReader.register(_ZipImportResourceReader)
286 _ZipImportResourceReader._registered = True
287 return _ZipImportResourceReader(self, fullname)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300288
289
290 def __repr__(self):
291 return f'<zipimporter object "{self.archive}{path_sep}{self.prefix}">'
292
293
294# _zip_searchorder defines how we search for a module in the Zip
295# archive: we first search for a package __init__, then for
296# non-package .pyc, and .py entries. The .pyc entries
297# are swapped by initzipimport() if we run in optimized mode. Also,
298# '/' is replaced by path_sep there.
299_zip_searchorder = (
300 (path_sep + '__init__.pyc', True, True),
301 (path_sep + '__init__.py', False, True),
302 ('.pyc', True, False),
303 ('.py', False, False),
304)
305
306# Given a module name, return the potential file path in the
307# archive (without extension).
308def _get_module_path(self, fullname):
309 return self.prefix + fullname.rpartition('.')[2]
310
311# Does this path represent a directory?
312def _is_dir(self, path):
313 # See if this is a "directory". If so, it's eligible to be part
314 # of a namespace package. We test by seeing if the name, with an
315 # appended path separator, exists.
316 dirpath = path + path_sep
317 # If dirpath is present in self._files, we have a directory.
318 return dirpath in self._files
319
320# Return some information about a module.
321def _get_module_info(self, fullname):
322 path = _get_module_path(self, fullname)
323 for suffix, isbytecode, ispackage in _zip_searchorder:
324 fullpath = path + suffix
325 if fullpath in self._files:
326 return ispackage
327 return None
328
329
330# implementation
331
332# _read_directory(archive) -> files dict (new reference)
333#
334# Given a path to a Zip archive, build a dict, mapping file names
335# (local to the archive, using SEP as a separator) to toc entries.
336#
337# A toc_entry is a tuple:
338#
339# (__file__, # value to use for __file__, available for all files,
340# # encoded to the filesystem encoding
341# compress, # compression kind; 0 for uncompressed
342# data_size, # size of compressed data on disk
343# file_size, # size of decompressed data
344# file_offset, # offset of file header from start of archive
345# time, # mod time of file (in dos format)
346# date, # mod data of file (in dos format)
347# crc, # crc checksum of the data
348# )
349#
350# Directories can be recognized by the trailing path_sep in the name,
351# data_size and file_offset are 0.
352def _read_directory(archive):
353 try:
354 fp = _io.open(archive, 'rb')
355 except OSError:
356 raise ZipImportError(f"can't open Zip file: {archive!r}", path=archive)
357
358 with fp:
359 try:
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600360 fp.seek(-END_CENTRAL_DIR_SIZE, 2)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300361 header_position = fp.tell()
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600362 buffer = fp.read(END_CENTRAL_DIR_SIZE)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300363 except OSError:
364 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600365 if len(buffer) != END_CENTRAL_DIR_SIZE:
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300366 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600367 if buffer[:4] != STRING_END_ARCHIVE:
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300368 # Bad: End of Central Dir signature
Zackery Spytz5a5ce062018-09-25 13:15:47 -0600369 # Check if there's a comment.
370 try:
371 fp.seek(0, 2)
372 file_size = fp.tell()
373 except OSError:
374 raise ZipImportError(f"can't read Zip file: {archive!r}",
375 path=archive)
376 max_comment_start = max(file_size - MAX_COMMENT_LEN -
377 END_CENTRAL_DIR_SIZE, 0)
378 try:
379 fp.seek(max_comment_start)
380 data = fp.read()
381 except OSError:
382 raise ZipImportError(f"can't read Zip file: {archive!r}",
383 path=archive)
384 pos = data.rfind(STRING_END_ARCHIVE)
385 if pos < 0:
386 raise ZipImportError(f'not a Zip file: {archive!r}',
387 path=archive)
388 buffer = data[pos:pos+END_CENTRAL_DIR_SIZE]
389 if len(buffer) != END_CENTRAL_DIR_SIZE:
390 raise ZipImportError(f"corrupt Zip file: {archive!r}",
391 path=archive)
392 header_position = file_size - len(data) + pos
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300393
394 header_size = _unpack_uint32(buffer[12:16])
395 header_offset = _unpack_uint32(buffer[16:20])
396 if header_position < header_size:
397 raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
398 if header_position < header_offset:
399 raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
400 header_position -= header_size
401 arc_offset = header_position - header_offset
402 if arc_offset < 0:
403 raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)
404
405 files = {}
406 # Start of Central Directory
407 count = 0
408 try:
409 fp.seek(header_position)
410 except OSError:
411 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
412 while True:
413 buffer = fp.read(46)
414 if len(buffer) < 4:
415 raise EOFError('EOF read where not expected')
416 # Start of file header
417 if buffer[:4] != b'PK\x01\x02':
418 break # Bad: Central Dir File Header
419 if len(buffer) != 46:
420 raise EOFError('EOF read where not expected')
421 flags = _unpack_uint16(buffer[8:10])
422 compress = _unpack_uint16(buffer[10:12])
423 time = _unpack_uint16(buffer[12:14])
424 date = _unpack_uint16(buffer[14:16])
425 crc = _unpack_uint32(buffer[16:20])
426 data_size = _unpack_uint32(buffer[20:24])
427 file_size = _unpack_uint32(buffer[24:28])
428 name_size = _unpack_uint16(buffer[28:30])
429 extra_size = _unpack_uint16(buffer[30:32])
430 comment_size = _unpack_uint16(buffer[32:34])
431 file_offset = _unpack_uint32(buffer[42:46])
432 header_size = name_size + extra_size + comment_size
433 if file_offset > header_offset:
434 raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
435 file_offset += arc_offset
436
437 try:
438 name = fp.read(name_size)
439 except OSError:
440 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
441 if len(name) != name_size:
442 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
443 # On Windows, calling fseek to skip over the fields we don't use is
444 # slower than reading the data because fseek flushes stdio's
445 # internal buffers. See issue #8745.
446 try:
447 if len(fp.read(header_size - name_size)) != header_size - name_size:
448 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
449 except OSError:
450 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
451
452 if flags & 0x800:
453 # UTF-8 file names extension
454 name = name.decode()
455 else:
456 # Historical ZIP filename encoding
457 try:
458 name = name.decode('ascii')
459 except UnicodeDecodeError:
460 name = name.decode('latin1').translate(cp437_table)
461
462 name = name.replace('/', path_sep)
463 path = _bootstrap_external._path_join(archive, name)
464 t = (path, compress, data_size, file_size, file_offset, time, date, crc)
465 files[name] = t
466 count += 1
467 _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive)
468 return files
469
470# During bootstrap, we may need to load the encodings
471# package from a ZIP file. But the cp437 encoding is implemented
472# in Python in the encodings package.
473#
474# Break out of this dependency by using the translation table for
475# the cp437 encoding.
476cp437_table = (
477 # ASCII part, 8 rows x 16 chars
478 '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
479 '\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'
480 ' !"#$%&\'()*+,-./'
481 '0123456789:;<=>?'
482 '@ABCDEFGHIJKLMNO'
483 'PQRSTUVWXYZ[\\]^_'
484 '`abcdefghijklmno'
485 'pqrstuvwxyz{|}~\x7f'
486 # non-ASCII part, 16 rows x 8 chars
487 '\xc7\xfc\xe9\xe2\xe4\xe0\xe5\xe7'
488 '\xea\xeb\xe8\xef\xee\xec\xc4\xc5'
489 '\xc9\xe6\xc6\xf4\xf6\xf2\xfb\xf9'
490 '\xff\xd6\xdc\xa2\xa3\xa5\u20a7\u0192'
491 '\xe1\xed\xf3\xfa\xf1\xd1\xaa\xba'
492 '\xbf\u2310\xac\xbd\xbc\xa1\xab\xbb'
493 '\u2591\u2592\u2593\u2502\u2524\u2561\u2562\u2556'
494 '\u2555\u2563\u2551\u2557\u255d\u255c\u255b\u2510'
495 '\u2514\u2534\u252c\u251c\u2500\u253c\u255e\u255f'
496 '\u255a\u2554\u2569\u2566\u2560\u2550\u256c\u2567'
497 '\u2568\u2564\u2565\u2559\u2558\u2552\u2553\u256b'
498 '\u256a\u2518\u250c\u2588\u2584\u258c\u2590\u2580'
499 '\u03b1\xdf\u0393\u03c0\u03a3\u03c3\xb5\u03c4'
500 '\u03a6\u0398\u03a9\u03b4\u221e\u03c6\u03b5\u2229'
501 '\u2261\xb1\u2265\u2264\u2320\u2321\xf7\u2248'
502 '\xb0\u2219\xb7\u221a\u207f\xb2\u25a0\xa0'
503)
504
505_importing_zlib = False
506
507# Return the zlib.decompress function object, or NULL if zlib couldn't
508# be imported. The function is cached when found, so subsequent calls
509# don't import zlib again.
510def _get_decompress_func():
511 global _importing_zlib
512 if _importing_zlib:
513 # Someone has a zlib.py[co] in their Zip file
514 # let's avoid a stack overflow.
515 _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
516 raise ZipImportError("can't decompress data; zlib not available")
517
518 _importing_zlib = True
519 try:
520 from zlib import decompress
521 except Exception:
522 _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
523 raise ZipImportError("can't decompress data; zlib not available")
524 finally:
525 _importing_zlib = False
526
527 _bootstrap._verbose_message('zipimport: zlib available')
528 return decompress
529
530# Given a path to a Zip file and a toc_entry, return the (uncompressed) data.
531def _get_data(archive, toc_entry):
532 datapath, compress, data_size, file_size, file_offset, time, date, crc = toc_entry
533 if data_size < 0:
534 raise ZipImportError('negative data size')
535
536 with _io.open(archive, 'rb') as fp:
537 # Check to make sure the local file header is correct
538 try:
539 fp.seek(file_offset)
540 except OSError:
541 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
542 buffer = fp.read(30)
543 if len(buffer) != 30:
544 raise EOFError('EOF read where not expected')
545
546 if buffer[:4] != b'PK\x03\x04':
547 # Bad: Local File Header
548 raise ZipImportError(f'bad local file header: {archive!r}', path=archive)
549
550 name_size = _unpack_uint16(buffer[26:28])
551 extra_size = _unpack_uint16(buffer[28:30])
552 header_size = 30 + name_size + extra_size
553 file_offset += header_size # Start of file data
554 try:
555 fp.seek(file_offset)
556 except OSError:
557 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
558 raw_data = fp.read(data_size)
559 if len(raw_data) != data_size:
560 raise OSError("zipimport: can't read data")
561
562 if compress == 0:
563 # data is not compressed
564 return raw_data
565
566 # Decompress with zlib
567 try:
568 decompress = _get_decompress_func()
569 except Exception:
570 raise ZipImportError("can't decompress data; zlib not available")
571 return decompress(raw_data, -15)
572
573
574# Lenient date/time comparison function. The precision of the mtime
575# in the archive is lower than the mtime stored in a .pyc: we
576# must allow a difference of at most one second.
577def _eq_mtime(t1, t2):
578 # dostime only stores even seconds, so be lenient
579 return abs(t1 - t2) <= 1
580
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500581
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300582# Given the contents of a .py[co] file, unmarshal the data
583# and return the code object. Return None if it the magic word doesn't
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500584# match, or if the recorded .py[co] metadata does not match the source,
585# (we do this instead of raising an exception as we fall back
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300586# to .py if available and we don't want to mask other errors).
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500587def _unmarshal_code(self, pathname, fullpath, fullname, data):
588 exc_details = {
589 'name': fullname,
590 'path': fullpath,
591 }
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300592
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500593 try:
594 flags = _bootstrap_external._classify_pyc(data, fullname, exc_details)
595 except ImportError:
596 return None
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300597
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500598 hash_based = flags & 0b1 != 0
599 if hash_based:
600 check_source = flags & 0b10 != 0
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300601 if (_imp.check_hash_based_pycs != 'never' and
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500602 (check_source or _imp.check_hash_based_pycs == 'always')):
603 source_bytes = _get_pyc_source(self, fullpath)
604 if source_bytes is not None:
605 source_hash = _imp.source_hash(
606 _bootstrap_external._RAW_MAGIC_NUMBER,
607 source_bytes,
608 )
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300609
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500610 try:
611 _boostrap_external._validate_hash_pyc(
612 data, source_hash, fullname, exc_details)
613 except ImportError:
614 return None
615 else:
616 source_mtime, source_size = \
617 _get_mtime_and_size_of_source(self, fullpath)
618
619 if source_mtime:
620 # We don't use _bootstrap_external._validate_timestamp_pyc
621 # to allow for a more lenient timestamp check.
622 if (not _eq_mtime(_unpack_uint32(data[8:12]), source_mtime) or
623 _unpack_uint32(data[12:16]) != source_size):
624 _bootstrap._verbose_message(
625 f'bytecode is stale for {fullname!r}')
626 return None
627
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300628 code = marshal.loads(data[16:])
629 if not isinstance(code, _code_type):
630 raise TypeError(f'compiled module {pathname!r} is not a code object')
631 return code
632
633_code_type = type(_unmarshal_code.__code__)
634
635
636# Replace any occurrences of '\r\n?' in the input string with '\n'.
637# This converts DOS and Mac line endings to Unix line endings.
638def _normalize_line_endings(source):
639 source = source.replace(b'\r\n', b'\n')
640 source = source.replace(b'\r', b'\n')
641 return source
642
643# Given a string buffer containing Python source code, compile it
644# and return a code object.
645def _compile_source(pathname, source):
646 source = _normalize_line_endings(source)
647 return compile(source, pathname, 'exec', dont_inherit=True)
648
649# Convert the date/time values found in the Zip archive to a value
650# that's compatible with the time stamp stored in .pyc files.
651def _parse_dostime(d, t):
652 return time.mktime((
653 (d >> 9) + 1980, # bits 9..15: year
654 (d >> 5) & 0xF, # bits 5..8: month
655 d & 0x1F, # bits 0..4: day
656 t >> 11, # bits 11..15: hours
657 (t >> 5) & 0x3F, # bits 8..10: minutes
658 (t & 0x1F) * 2, # bits 0..7: seconds / 2
659 -1, -1, -1))
660
661# Given a path to a .pyc file in the archive, return the
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500662# modification time of the matching .py file and its size,
663# or (0, 0) if no source is available.
664def _get_mtime_and_size_of_source(self, path):
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300665 try:
666 # strip 'c' or 'o' from *.py[co]
667 assert path[-1:] in ('c', 'o')
668 path = path[:-1]
669 toc_entry = self._files[path]
670 # fetch the time stamp of the .py file for comparison
671 # with an embedded pyc time stamp
672 time = toc_entry[5]
673 date = toc_entry[6]
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500674 uncompressed_size = toc_entry[3]
675 return _parse_dostime(date, time), uncompressed_size
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300676 except (KeyError, IndexError, TypeError):
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500677 return 0, 0
678
679
680# Given a path to a .pyc file in the archive, return the
681# contents of the matching .py file, or None if no source
682# is available.
683def _get_pyc_source(self, path):
684 # strip 'c' or 'o' from *.py[co]
685 assert path[-1:] in ('c', 'o')
686 path = path[:-1]
687
688 try:
689 toc_entry = self._files[path]
690 except KeyError:
691 return None
692 else:
693 return _get_data(self.archive, toc_entry)
694
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300695
696# Get the code object associated with the module specified by
697# 'fullname'.
698def _get_module_code(self, fullname):
699 path = _get_module_path(self, fullname)
700 for suffix, isbytecode, ispackage in _zip_searchorder:
701 fullpath = path + suffix
702 _bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2)
703 try:
704 toc_entry = self._files[fullpath]
705 except KeyError:
706 pass
707 else:
708 modpath = toc_entry[0]
709 data = _get_data(self.archive, toc_entry)
710 if isbytecode:
Elvis Pranskevichusa6e956b2018-11-07 13:34:59 -0500711 code = _unmarshal_code(self, modpath, fullpath, fullname, data)
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +0300712 else:
713 code = _compile_source(modpath, data)
714 if code is None:
715 # bad magic number or non-matching mtime
716 # in byte code, try next
717 continue
718 modpath = toc_entry[0]
719 return code, ispackage, modpath
720 else:
721 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
Serhiy Storchaka9da39612018-09-19 09:28:06 +0300722
723
724class _ZipImportResourceReader:
725 """Private class used to support ZipImport.get_resource_reader().
726
727 This class is allowed to reference all the innards and private parts of
728 the zipimporter.
729 """
730 _registered = False
731
732 def __init__(self, zipimporter, fullname):
733 self.zipimporter = zipimporter
734 self.fullname = fullname
735
736 def open_resource(self, resource):
737 fullname_as_path = self.fullname.replace('.', '/')
738 path = f'{fullname_as_path}/{resource}'
739 from io import BytesIO
740 try:
741 return BytesIO(self.zipimporter.get_data(path))
742 except OSError:
743 raise FileNotFoundError(path)
744
745 def resource_path(self, resource):
746 # All resources are in the zip file, so there is no path to the file.
747 # Raising FileNotFoundError tells the higher level API to extract the
748 # binary data and create a temporary file.
749 raise FileNotFoundError
750
751 def is_resource(self, name):
752 # Maybe we could do better, but if we can get the data, it's a
753 # resource. Otherwise it isn't.
754 fullname_as_path = self.fullname.replace('.', '/')
755 path = f'{fullname_as_path}/{name}'
756 try:
757 self.zipimporter.get_data(path)
758 except OSError:
759 return False
760 return True
761
762 def contents(self):
763 # This is a bit convoluted, because fullname will be a module path,
764 # but _files is a list of file names relative to the top of the
765 # archive's namespace. We want to compare file paths to find all the
766 # names of things inside the module represented by fullname. So we
767 # turn the module path of fullname into a file path relative to the
768 # top of the archive, and then we iterate through _files looking for
769 # names inside that "directory".
770 from pathlib import Path
771 fullname_path = Path(self.zipimporter.get_filename(self.fullname))
772 relative_path = fullname_path.relative_to(self.zipimporter.archive)
773 # Don't forget that fullname names a package, so its path will include
774 # __init__.py, which we want to ignore.
775 assert relative_path.name == '__init__.py'
776 package_path = relative_path.parent
777 subdirs_seen = set()
778 for filename in self.zipimporter._files:
779 try:
780 relative = Path(filename).relative_to(package_path)
781 except ValueError:
782 continue
783 # If the path of the file (which is relative to the top of the zip
784 # namespace), relative to the package given when the resource
785 # reader was created, has a parent, then it's a name in a
786 # subdirectory and thus we skip it.
787 parent_name = relative.parent.name
788 if len(parent_name) == 0:
789 yield relative.name
790 elif parent_name not in subdirs_seen:
791 subdirs_seen.add(parent_name)
792 yield parent_name