blob: 059f124512dc2df75c7a86778aa6c810fd7c7a40 [file] [log] [blame]
Serhiy Storchaka79d1c2e2018-09-18 22:22:29 +03001"""zipimport provides support for importing Python modules from Zip archives.
2
3This module exports three objects:
4- zipimporter: a class; its constructor takes a path to a Zip archive.
5- ZipImportError: exception raised by zipimporter objects. It's a
6 subclass of ImportError, so it can be caught as ImportError, too.
7- _zip_directory_cache: a dict, mapping archive paths to zip directory
8 info dicts, as used in zipimporter._files.
9
10It is usually not needed to use the zipimport module explicitly; it is
11used by the builtin import mechanism for sys.path items that are paths
12to Zip archives.
13"""
14
15#from importlib import _bootstrap_external
16#from importlib import _bootstrap # for _verbose_message
17import _frozen_importlib_external as _bootstrap_external
18from _frozen_importlib_external import _unpack_uint16, _unpack_uint32
19import _frozen_importlib as _bootstrap # for _verbose_message
20import _imp # for check_hash_based_pycs
21import _io # for open
22import marshal # for loads
23import sys # for modules
24import time # for mktime
25
26__all__ = ['ZipImportError', 'zipimporter']
27
28
29path_sep = _bootstrap_external.path_sep
30alt_path_sep = _bootstrap_external.path_separators[1:]
31
32
33class ZipImportError(ImportError):
34 pass
35
36# _read_directory() cache
37_zip_directory_cache = {}
38
39_module_type = type(sys)
40
41
42class zipimporter:
43 """zipimporter(archivepath) -> zipimporter object
44
45 Create a new zipimporter instance. 'archivepath' must be a path to
46 a zipfile, or to a specific path inside a zipfile. For example, it can be
47 '/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a
48 valid directory inside the archive.
49
50 'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip
51 archive.
52
53 The 'archive' attribute of zipimporter objects contains the name of the
54 zipfile targeted.
55 """
56
57 # Split the "subdirectory" from the Zip archive path, lookup a matching
58 # entry in sys.path_importer_cache, fetch the file directory from there
59 # if found, or else read it from the archive.
60 def __init__(self, path):
61 if not isinstance(path, str):
62 import os
63 path = os.fsdecode(path)
64 if not path:
65 raise ZipImportError('archive path is empty', path=path)
66 if alt_path_sep:
67 path = path.replace(alt_path_sep, path_sep)
68
69 prefix = []
70 while True:
71 try:
72 st = _bootstrap_external._path_stat(path)
73 except (OSError, ValueError):
74 # On Windows a ValueError is raised for too long paths.
75 # Back up one path element.
76 dirname, basename = _bootstrap_external._path_split(path)
77 if dirname == path:
78 raise ZipImportError('not a Zip file', path=path)
79 path = dirname
80 prefix.append(basename)
81 else:
82 # it exists
83 if (st.st_mode & 0o170000) != 0o100000: # stat.S_ISREG
84 # it's a not file
85 raise ZipImportError('not a Zip file', path=path)
86 break
87
88 try:
89 files = _zip_directory_cache[path]
90 except KeyError:
91 files = _read_directory(path)
92 _zip_directory_cache[path] = files
93 self._files = files
94 self.archive = path
95 # a prefix directory following the ZIP file path.
96 self.prefix = _bootstrap_external._path_join(*prefix[::-1])
97 if self.prefix:
98 self.prefix += path_sep
99
100
101 # Check whether we can satisfy the import of the module named by
102 # 'fullname', or whether it could be a portion of a namespace
103 # package. Return self if we can load it, a string containing the
104 # full path if it's a possible namespace portion, None if we
105 # can't load it.
106 def find_loader(self, fullname, path=None):
107 """find_loader(fullname, path=None) -> self, str or None.
108
109 Search for a module specified by 'fullname'. 'fullname' must be the
110 fully qualified (dotted) module name. It returns the zipimporter
111 instance itself if the module was found, a string containing the
112 full path name if it's possibly a portion of a namespace package,
113 or None otherwise. The optional 'path' argument is ignored -- it's
114 there for compatibility with the importer protocol.
115 """
116 mi = _get_module_info(self, fullname)
117 if mi is not None:
118 # This is a module or package.
119 return self, []
120
121 # Not a module or regular package. See if this is a directory, and
122 # therefore possibly a portion of a namespace package.
123
124 # We're only interested in the last path component of fullname
125 # earlier components are recorded in self.prefix.
126 modpath = _get_module_path(self, fullname)
127 if _is_dir(self, modpath):
128 # This is possibly a portion of a namespace
129 # package. Return the string representing its path,
130 # without a trailing separator.
131 return None, [f'{self.archive}{path_sep}{modpath}']
132
133 return None, []
134
135
136 # Check whether we can satisfy the import of the module named by
137 # 'fullname'. Return self if we can, None if we can't.
138 def find_module(self, fullname, path=None):
139 """find_module(fullname, path=None) -> self or None.
140
141 Search for a module specified by 'fullname'. 'fullname' must be the
142 fully qualified (dotted) module name. It returns the zipimporter
143 instance itself if the module was found, or None if it wasn't.
144 The optional 'path' argument is ignored -- it's there for compatibility
145 with the importer protocol.
146 """
147 return self.find_loader(fullname, path)[0]
148
149
150 def get_code(self, fullname):
151 """get_code(fullname) -> code object.
152
153 Return the code object for the specified module. Raise ZipImportError
154 if the module couldn't be found.
155 """
156 code, ispackage, modpath = _get_module_code(self, fullname)
157 return code
158
159
160 def get_data(self, pathname):
161 """get_data(pathname) -> string with file data.
162
163 Return the data associated with 'pathname'. Raise OSError if
164 the file wasn't found.
165 """
166 if alt_path_sep:
167 pathname = pathname.replace(alt_path_sep, path_sep)
168
169 key = pathname
170 if pathname.startswith(self.archive + path_sep):
171 key = pathname[len(self.archive + path_sep):]
172
173 try:
174 toc_entry = self._files[key]
175 except KeyError:
176 raise OSError(0, '', key)
177 return _get_data(self.archive, toc_entry)
178
179
180 # Return a string matching __file__ for the named module
181 def get_filename(self, fullname):
182 """get_filename(fullname) -> filename string.
183
184 Return the filename for the specified module.
185 """
186 # Deciding the filename requires working out where the code
187 # would come from if the module was actually loaded
188 code, ispackage, modpath = _get_module_code(self, fullname)
189 return modpath
190
191
192 def get_source(self, fullname):
193 """get_source(fullname) -> source string.
194
195 Return the source code for the specified module. Raise ZipImportError
196 if the module couldn't be found, return None if the archive does
197 contain the module, but has no source for it.
198 """
199 mi = _get_module_info(self, fullname)
200 if mi is None:
201 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
202
203 path = _get_module_path(self, fullname)
204 if mi:
205 fullpath = _bootstrap_external._path_join(path, '__init__.py')
206 else:
207 fullpath = f'{path}.py'
208
209 try:
210 toc_entry = self._files[fullpath]
211 except KeyError:
212 # we have the module, but no source
213 return None
214 return _get_data(self.archive, toc_entry).decode()
215
216
217 # Return a bool signifying whether the module is a package or not.
218 def is_package(self, fullname):
219 """is_package(fullname) -> bool.
220
221 Return True if the module specified by fullname is a package.
222 Raise ZipImportError if the module couldn't be found.
223 """
224 mi = _get_module_info(self, fullname)
225 if mi is None:
226 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
227 return mi
228
229
230 # Load and return the module named by 'fullname'.
231 def load_module(self, fullname):
232 """load_module(fullname) -> module.
233
234 Load the module specified by 'fullname'. 'fullname' must be the
235 fully qualified (dotted) module name. It returns the imported
236 module, or raises ZipImportError if it wasn't found.
237 """
238 code, ispackage, modpath = _get_module_code(self, fullname)
239 mod = sys.modules.get(fullname)
240 if mod is None or not isinstance(mod, _module_type):
241 mod = _module_type(fullname)
242 sys.modules[fullname] = mod
243 mod.__loader__ = self
244
245 try:
246 if ispackage:
247 # add __path__ to the module *before* the code gets
248 # executed
249 path = _get_module_path(self, fullname)
250 fullpath = _bootstrap_external._path_join(self.archive, path)
251 mod.__path__ = [fullpath]
252
253 if not hasattr(mod, '__builtins__'):
254 mod.__builtins__ = __builtins__
255 _bootstrap_external._fix_up_module(mod.__dict__, fullname, modpath)
256 exec(code, mod.__dict__)
257 except:
258 del sys.modules[fullname]
259 raise
260
261 try:
262 mod = sys.modules[fullname]
263 except KeyError:
264 raise ImportError(f'Loaded module {fullname!r} not found in sys.modules')
265 _bootstrap._verbose_message('import {} # loaded from Zip {}', fullname, modpath)
266 return mod
267
268
269 def get_resource_reader(self, fullname):
270 """Return the ResourceReader for a package in a zip file.
271
272 If 'fullname' is a package within the zip file, return the
273 'ResourceReader' object for the package. Otherwise return None.
274 """
275 from importlib import resources
276 return resources._zipimport_get_resource_reader(self, fullname)
277
278
279 def __repr__(self):
280 return f'<zipimporter object "{self.archive}{path_sep}{self.prefix}">'
281
282
283# _zip_searchorder defines how we search for a module in the Zip
284# archive: we first search for a package __init__, then for
285# non-package .pyc, and .py entries. The .pyc entries
286# are swapped by initzipimport() if we run in optimized mode. Also,
287# '/' is replaced by path_sep there.
288_zip_searchorder = (
289 (path_sep + '__init__.pyc', True, True),
290 (path_sep + '__init__.py', False, True),
291 ('.pyc', True, False),
292 ('.py', False, False),
293)
294
295# Given a module name, return the potential file path in the
296# archive (without extension).
297def _get_module_path(self, fullname):
298 return self.prefix + fullname.rpartition('.')[2]
299
300# Does this path represent a directory?
301def _is_dir(self, path):
302 # See if this is a "directory". If so, it's eligible to be part
303 # of a namespace package. We test by seeing if the name, with an
304 # appended path separator, exists.
305 dirpath = path + path_sep
306 # If dirpath is present in self._files, we have a directory.
307 return dirpath in self._files
308
309# Return some information about a module.
310def _get_module_info(self, fullname):
311 path = _get_module_path(self, fullname)
312 for suffix, isbytecode, ispackage in _zip_searchorder:
313 fullpath = path + suffix
314 if fullpath in self._files:
315 return ispackage
316 return None
317
318
319# implementation
320
321# _read_directory(archive) -> files dict (new reference)
322#
323# Given a path to a Zip archive, build a dict, mapping file names
324# (local to the archive, using SEP as a separator) to toc entries.
325#
326# A toc_entry is a tuple:
327#
328# (__file__, # value to use for __file__, available for all files,
329# # encoded to the filesystem encoding
330# compress, # compression kind; 0 for uncompressed
331# data_size, # size of compressed data on disk
332# file_size, # size of decompressed data
333# file_offset, # offset of file header from start of archive
334# time, # mod time of file (in dos format)
335# date, # mod data of file (in dos format)
336# crc, # crc checksum of the data
337# )
338#
339# Directories can be recognized by the trailing path_sep in the name,
340# data_size and file_offset are 0.
341def _read_directory(archive):
342 try:
343 fp = _io.open(archive, 'rb')
344 except OSError:
345 raise ZipImportError(f"can't open Zip file: {archive!r}", path=archive)
346
347 with fp:
348 try:
349 fp.seek(-22, 2)
350 header_position = fp.tell()
351 buffer = fp.read(22)
352 except OSError:
353 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
354 if len(buffer) != 22:
355 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
356 if buffer[:4] != b'PK\x05\x06':
357 # Bad: End of Central Dir signature
358 raise ZipImportError(f'not a Zip file: {archive!r}', path=archive)
359
360 header_size = _unpack_uint32(buffer[12:16])
361 header_offset = _unpack_uint32(buffer[16:20])
362 if header_position < header_size:
363 raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
364 if header_position < header_offset:
365 raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
366 header_position -= header_size
367 arc_offset = header_position - header_offset
368 if arc_offset < 0:
369 raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)
370
371 files = {}
372 # Start of Central Directory
373 count = 0
374 try:
375 fp.seek(header_position)
376 except OSError:
377 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
378 while True:
379 buffer = fp.read(46)
380 if len(buffer) < 4:
381 raise EOFError('EOF read where not expected')
382 # Start of file header
383 if buffer[:4] != b'PK\x01\x02':
384 break # Bad: Central Dir File Header
385 if len(buffer) != 46:
386 raise EOFError('EOF read where not expected')
387 flags = _unpack_uint16(buffer[8:10])
388 compress = _unpack_uint16(buffer[10:12])
389 time = _unpack_uint16(buffer[12:14])
390 date = _unpack_uint16(buffer[14:16])
391 crc = _unpack_uint32(buffer[16:20])
392 data_size = _unpack_uint32(buffer[20:24])
393 file_size = _unpack_uint32(buffer[24:28])
394 name_size = _unpack_uint16(buffer[28:30])
395 extra_size = _unpack_uint16(buffer[30:32])
396 comment_size = _unpack_uint16(buffer[32:34])
397 file_offset = _unpack_uint32(buffer[42:46])
398 header_size = name_size + extra_size + comment_size
399 if file_offset > header_offset:
400 raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
401 file_offset += arc_offset
402
403 try:
404 name = fp.read(name_size)
405 except OSError:
406 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
407 if len(name) != name_size:
408 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
409 # On Windows, calling fseek to skip over the fields we don't use is
410 # slower than reading the data because fseek flushes stdio's
411 # internal buffers. See issue #8745.
412 try:
413 if len(fp.read(header_size - name_size)) != header_size - name_size:
414 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
415 except OSError:
416 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
417
418 if flags & 0x800:
419 # UTF-8 file names extension
420 name = name.decode()
421 else:
422 # Historical ZIP filename encoding
423 try:
424 name = name.decode('ascii')
425 except UnicodeDecodeError:
426 name = name.decode('latin1').translate(cp437_table)
427
428 name = name.replace('/', path_sep)
429 path = _bootstrap_external._path_join(archive, name)
430 t = (path, compress, data_size, file_size, file_offset, time, date, crc)
431 files[name] = t
432 count += 1
433 _bootstrap._verbose_message('zipimport: found {} names in {!r}', count, archive)
434 return files
435
436# During bootstrap, we may need to load the encodings
437# package from a ZIP file. But the cp437 encoding is implemented
438# in Python in the encodings package.
439#
440# Break out of this dependency by using the translation table for
441# the cp437 encoding.
442cp437_table = (
443 # ASCII part, 8 rows x 16 chars
444 '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
445 '\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'
446 ' !"#$%&\'()*+,-./'
447 '0123456789:;<=>?'
448 '@ABCDEFGHIJKLMNO'
449 'PQRSTUVWXYZ[\\]^_'
450 '`abcdefghijklmno'
451 'pqrstuvwxyz{|}~\x7f'
452 # non-ASCII part, 16 rows x 8 chars
453 '\xc7\xfc\xe9\xe2\xe4\xe0\xe5\xe7'
454 '\xea\xeb\xe8\xef\xee\xec\xc4\xc5'
455 '\xc9\xe6\xc6\xf4\xf6\xf2\xfb\xf9'
456 '\xff\xd6\xdc\xa2\xa3\xa5\u20a7\u0192'
457 '\xe1\xed\xf3\xfa\xf1\xd1\xaa\xba'
458 '\xbf\u2310\xac\xbd\xbc\xa1\xab\xbb'
459 '\u2591\u2592\u2593\u2502\u2524\u2561\u2562\u2556'
460 '\u2555\u2563\u2551\u2557\u255d\u255c\u255b\u2510'
461 '\u2514\u2534\u252c\u251c\u2500\u253c\u255e\u255f'
462 '\u255a\u2554\u2569\u2566\u2560\u2550\u256c\u2567'
463 '\u2568\u2564\u2565\u2559\u2558\u2552\u2553\u256b'
464 '\u256a\u2518\u250c\u2588\u2584\u258c\u2590\u2580'
465 '\u03b1\xdf\u0393\u03c0\u03a3\u03c3\xb5\u03c4'
466 '\u03a6\u0398\u03a9\u03b4\u221e\u03c6\u03b5\u2229'
467 '\u2261\xb1\u2265\u2264\u2320\u2321\xf7\u2248'
468 '\xb0\u2219\xb7\u221a\u207f\xb2\u25a0\xa0'
469)
470
471_importing_zlib = False
472
473# Return the zlib.decompress function object, or NULL if zlib couldn't
474# be imported. The function is cached when found, so subsequent calls
475# don't import zlib again.
476def _get_decompress_func():
477 global _importing_zlib
478 if _importing_zlib:
479 # Someone has a zlib.py[co] in their Zip file
480 # let's avoid a stack overflow.
481 _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
482 raise ZipImportError("can't decompress data; zlib not available")
483
484 _importing_zlib = True
485 try:
486 from zlib import decompress
487 except Exception:
488 _bootstrap._verbose_message('zipimport: zlib UNAVAILABLE')
489 raise ZipImportError("can't decompress data; zlib not available")
490 finally:
491 _importing_zlib = False
492
493 _bootstrap._verbose_message('zipimport: zlib available')
494 return decompress
495
496# Given a path to a Zip file and a toc_entry, return the (uncompressed) data.
497def _get_data(archive, toc_entry):
498 datapath, compress, data_size, file_size, file_offset, time, date, crc = toc_entry
499 if data_size < 0:
500 raise ZipImportError('negative data size')
501
502 with _io.open(archive, 'rb') as fp:
503 # Check to make sure the local file header is correct
504 try:
505 fp.seek(file_offset)
506 except OSError:
507 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
508 buffer = fp.read(30)
509 if len(buffer) != 30:
510 raise EOFError('EOF read where not expected')
511
512 if buffer[:4] != b'PK\x03\x04':
513 # Bad: Local File Header
514 raise ZipImportError(f'bad local file header: {archive!r}', path=archive)
515
516 name_size = _unpack_uint16(buffer[26:28])
517 extra_size = _unpack_uint16(buffer[28:30])
518 header_size = 30 + name_size + extra_size
519 file_offset += header_size # Start of file data
520 try:
521 fp.seek(file_offset)
522 except OSError:
523 raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
524 raw_data = fp.read(data_size)
525 if len(raw_data) != data_size:
526 raise OSError("zipimport: can't read data")
527
528 if compress == 0:
529 # data is not compressed
530 return raw_data
531
532 # Decompress with zlib
533 try:
534 decompress = _get_decompress_func()
535 except Exception:
536 raise ZipImportError("can't decompress data; zlib not available")
537 return decompress(raw_data, -15)
538
539
540# Lenient date/time comparison function. The precision of the mtime
541# in the archive is lower than the mtime stored in a .pyc: we
542# must allow a difference of at most one second.
543def _eq_mtime(t1, t2):
544 # dostime only stores even seconds, so be lenient
545 return abs(t1 - t2) <= 1
546
547# Given the contents of a .py[co] file, unmarshal the data
548# and return the code object. Return None if it the magic word doesn't
549# match (we do this instead of raising an exception as we fall back
550# to .py if available and we don't want to mask other errors).
551def _unmarshal_code(pathname, data, mtime):
552 if len(data) < 16:
553 raise ZipImportError('bad pyc data')
554
555 if data[:4] != _bootstrap_external.MAGIC_NUMBER:
556 _bootstrap._verbose_message('{!r} has bad magic', pathname)
557 return None # signal caller to try alternative
558
559 flags = _unpack_uint32(data[4:8])
560 if flags != 0:
561 # Hash-based pyc. We currently refuse to handle checked hash-based
562 # pycs. We could validate hash-based pycs against the source, but it
563 # seems likely that most people putting hash-based pycs in a zipfile
564 # will use unchecked ones.
565 if (_imp.check_hash_based_pycs != 'never' and
566 (flags != 0x1 or _imp.check_hash_based_pycs == 'always')):
567 return None
568 elif mtime != 0 and not _eq_mtime(_unpack_uint32(data[8:12]), mtime):
569 _bootstrap._verbose_message('{!r} has bad mtime', pathname)
570 return None # signal caller to try alternative
571
572 # XXX the pyc's size field is ignored; timestamp collisions are probably
573 # unimportant with zip files.
574 code = marshal.loads(data[16:])
575 if not isinstance(code, _code_type):
576 raise TypeError(f'compiled module {pathname!r} is not a code object')
577 return code
578
579_code_type = type(_unmarshal_code.__code__)
580
581
582# Replace any occurrences of '\r\n?' in the input string with '\n'.
583# This converts DOS and Mac line endings to Unix line endings.
584def _normalize_line_endings(source):
585 source = source.replace(b'\r\n', b'\n')
586 source = source.replace(b'\r', b'\n')
587 return source
588
589# Given a string buffer containing Python source code, compile it
590# and return a code object.
591def _compile_source(pathname, source):
592 source = _normalize_line_endings(source)
593 return compile(source, pathname, 'exec', dont_inherit=True)
594
595# Convert the date/time values found in the Zip archive to a value
596# that's compatible with the time stamp stored in .pyc files.
597def _parse_dostime(d, t):
598 return time.mktime((
599 (d >> 9) + 1980, # bits 9..15: year
600 (d >> 5) & 0xF, # bits 5..8: month
601 d & 0x1F, # bits 0..4: day
602 t >> 11, # bits 11..15: hours
603 (t >> 5) & 0x3F, # bits 8..10: minutes
604 (t & 0x1F) * 2, # bits 0..7: seconds / 2
605 -1, -1, -1))
606
607# Given a path to a .pyc file in the archive, return the
608# modification time of the matching .py file, or 0 if no source
609# is available.
610def _get_mtime_of_source(self, path):
611 try:
612 # strip 'c' or 'o' from *.py[co]
613 assert path[-1:] in ('c', 'o')
614 path = path[:-1]
615 toc_entry = self._files[path]
616 # fetch the time stamp of the .py file for comparison
617 # with an embedded pyc time stamp
618 time = toc_entry[5]
619 date = toc_entry[6]
620 return _parse_dostime(date, time)
621 except (KeyError, IndexError, TypeError):
622 return 0
623
624# Get the code object associated with the module specified by
625# 'fullname'.
626def _get_module_code(self, fullname):
627 path = _get_module_path(self, fullname)
628 for suffix, isbytecode, ispackage in _zip_searchorder:
629 fullpath = path + suffix
630 _bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2)
631 try:
632 toc_entry = self._files[fullpath]
633 except KeyError:
634 pass
635 else:
636 modpath = toc_entry[0]
637 data = _get_data(self.archive, toc_entry)
638 if isbytecode:
639 mtime = _get_mtime_of_source(self, fullpath)
640 code = _unmarshal_code(modpath, data, mtime)
641 else:
642 code = _compile_source(modpath, data)
643 if code is None:
644 # bad magic number or non-matching mtime
645 # in byte code, try next
646 continue
647 modpath = toc_entry[0]
648 return code, ispackage, modpath
649 else:
650 raise ZipImportError(f"can't find module {fullname!r}", name=fullname)