blob: 38dc0c42907f61e77f94c89aec763f60ccc2db38 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000013 char suffix[14];
14 int type;
Just van Rossum52e14d62002-12-30 22:08:05 +000015};
16
Victor Stinner651f9f72013-11-12 21:44:18 +010017#ifdef ALTSEP
18_Py_IDENTIFIER(replace);
19#endif
20
Just van Rossum52e14d62002-12-30 22:08:05 +000021/* zip_searchorder defines how we search for a module in the Zip
22 archive: we first search for a package __init__, then for
23 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
24 are swapped by initzipimport() if we run in optimized mode. Also,
25 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000026static struct st_zip_searchorder zip_searchorder[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000027 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
28 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
29 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
30 {".pyc", IS_BYTECODE},
31 {".pyo", IS_BYTECODE},
32 {".py", IS_SOURCE},
33 {"", 0}
Just van Rossum52e14d62002-12-30 22:08:05 +000034};
35
36/* zipimporter object definition and support */
37
38typedef struct _zipimporter ZipImporter;
39
40struct _zipimporter {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000041 PyObject_HEAD
Victor Stinner9e40fad2010-10-18 22:34:46 +000042 PyObject *archive; /* pathname of the Zip archive,
43 decoded from the filesystem encoding */
Victor Stinner72f767e2010-10-18 11:44:21 +000044 PyObject *prefix; /* file prefix: "a/sub/directory/",
45 encoded to the filesystem encoding */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000046 PyObject *files; /* dict with file info {path: toc_entry} */
Just van Rossum52e14d62002-12-30 22:08:05 +000047};
48
Just van Rossum52e14d62002-12-30 22:08:05 +000049static PyObject *ZipImportError;
Victor Stinnerc342fca2010-10-18 11:39:05 +000050/* read_directory() cache */
Just van Rossum52e14d62002-12-30 22:08:05 +000051static PyObject *zip_directory_cache = NULL;
52
53/* forward decls */
Benjamin Peterson34c15402014-02-16 14:17:28 -050054static PyObject *read_directory(PyObject *archive);
55static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
Victor Stinnerf6b563a2011-03-14 20:46:50 -040056static PyObject *get_module_code(ZipImporter *self, PyObject *fullname,
Victor Stinner08654e12010-10-18 12:09:02 +000057 int *p_ispackage, PyObject **p_modpath);
Just van Rossum52e14d62002-12-30 22:08:05 +000058
59
60#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
61
62
63/* zipimporter.__init__
64 Split the "subdirectory" from the Zip archive path, lookup a matching
65 entry in sys.path_importer_cache, fetch the file directory from there
66 if found, or else read it from the archive. */
67static int
68zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
69{
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010070 PyObject *path, *files, *tmp;
71 PyObject *filename = NULL;
72 Py_ssize_t len, flen;
Just van Rossum52e14d62002-12-30 22:08:05 +000073
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 if (!_PyArg_NoKeywords("zipimporter()", kwds))
75 return -1;
Georg Brandl02c42872005-08-26 06:42:30 +000076
Victor Stinner2b8dab72010-08-14 14:54:10 +000077 if (!PyArg_ParseTuple(args, "O&:zipimporter",
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010078 PyUnicode_FSDecoder, &path))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +000080
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010081 if (PyUnicode_READY(path) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020082 return -1;
83
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010084 len = PyUnicode_GET_LENGTH(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 if (len == 0) {
86 PyErr_SetString(ZipImportError, "archive path is empty");
Victor Stinner2b8dab72010-08-14 14:54:10 +000087 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000088 }
Just van Rossum52e14d62002-12-30 22:08:05 +000089
90#ifdef ALTSEP
Martin v. Löwiscfa61292011-10-31 09:01:22 +010091 tmp = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010092 if (!tmp)
93 goto error;
94 Py_DECREF(path);
95 path = tmp;
Just van Rossum52e14d62002-12-30 22:08:05 +000096#endif
97
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010098 filename = path;
99 Py_INCREF(filename);
100 flen = len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 for (;;) {
102 struct stat statbuf;
103 int rv;
Just van Rossum52e14d62002-12-30 22:08:05 +0000104
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100105 rv = _Py_stat(filename, &statbuf);
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100106 if (rv == -2)
107 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108 if (rv == 0) {
109 /* it exists */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100110 if (!S_ISREG(statbuf.st_mode))
111 /* it's a not file */
112 Py_CLEAR(filename);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 break;
114 }
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100115 Py_CLEAR(filename);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000116 /* back up one path element */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100117 flen = PyUnicode_FindChar(path, SEP, 0, flen, -1);
118 if (flen == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 break;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100120 filename = PyUnicode_Substring(path, 0, flen);
Victor Stinneraf8b7e82013-10-29 01:46:24 +0100121 if (filename == NULL)
122 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 }
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100124 if (filename == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 PyErr_SetString(ZipImportError, "not a Zip file");
Victor Stinner2b8dab72010-08-14 14:54:10 +0000126 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000128
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100129 if (PyUnicode_READY(filename) < 0)
130 goto error;
131
132 files = PyDict_GetItem(zip_directory_cache, filename);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000133 if (files == NULL) {
Benjamin Peterson34c15402014-02-16 14:17:28 -0500134 files = read_directory(filename);
135 if (files == NULL)
Victor Stinner2b8dab72010-08-14 14:54:10 +0000136 goto error;
Benjamin Peterson34c15402014-02-16 14:17:28 -0500137 if (PyDict_SetItem(zip_directory_cache, filename, files) != 0)
Victor Stinner2b8dab72010-08-14 14:54:10 +0000138 goto error;
139 }
140 else
141 Py_INCREF(files);
142 self->files = files;
143
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100144 /* Transfer reference */
145 self->archive = filename;
146 filename = NULL;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000147
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100148 /* Check if there is a prefix directory following the filename. */
149 if (flen != len) {
150 tmp = PyUnicode_Substring(path, flen+1,
151 PyUnicode_GET_LENGTH(path));
152 if (tmp == NULL)
153 goto error;
154 self->prefix = tmp;
155 if (PyUnicode_READ_CHAR(path, len-1) != SEP) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* add trailing SEP */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100157 tmp = PyUnicode_FromFormat("%U%c", self->prefix, SEP);
158 if (tmp == NULL)
159 goto error;
160 Py_DECREF(self->prefix);
161 self->prefix = tmp;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000162 }
163 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000164 else
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100165 self->prefix = PyUnicode_New(0, 0);
166 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 return 0;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000168
169error:
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100170 Py_DECREF(path);
171 Py_XDECREF(filename);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000172 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +0000173}
174
175/* GC support. */
176static int
177zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
178{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000179 ZipImporter *self = (ZipImporter *)obj;
180 Py_VISIT(self->files);
181 return 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000182}
183
184static void
185zipimporter_dealloc(ZipImporter *self)
186{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 PyObject_GC_UnTrack(self);
188 Py_XDECREF(self->archive);
189 Py_XDECREF(self->prefix);
190 Py_XDECREF(self->files);
191 Py_TYPE(self)->tp_free((PyObject *)self);
Just van Rossum52e14d62002-12-30 22:08:05 +0000192}
193
194static PyObject *
195zipimporter_repr(ZipImporter *self)
196{
Victor Stinner028dd972010-08-17 00:04:48 +0000197 if (self->archive == NULL)
198 return PyUnicode_FromString("<zipimporter object \"???\">");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200199 else if (self->prefix != NULL && PyUnicode_GET_LENGTH(self->prefix) != 0)
Victor Stinner07298a12010-10-18 22:45:54 +0000200 return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">",
Victor Stinner028dd972010-08-17 00:04:48 +0000201 self->archive, SEP, self->prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000202 else
Victor Stinner07298a12010-10-18 22:45:54 +0000203 return PyUnicode_FromFormat("<zipimporter object \"%U\">",
Victor Stinner028dd972010-08-17 00:04:48 +0000204 self->archive);
Just van Rossum52e14d62002-12-30 22:08:05 +0000205}
206
207/* return fullname.split(".")[-1] */
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400208static PyObject *
209get_subname(PyObject *fullname)
Just van Rossum52e14d62002-12-30 22:08:05 +0000210{
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100211 Py_ssize_t len, dot;
212 if (PyUnicode_READY(fullname) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200213 return NULL;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100214 len = PyUnicode_GET_LENGTH(fullname);
215 dot = PyUnicode_FindChar(fullname, '.', 0, len, -1);
216 if (dot == -1) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400217 Py_INCREF(fullname);
218 return fullname;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100219 } else
220 return PyUnicode_Substring(fullname, dot+1, len);
Just van Rossum52e14d62002-12-30 22:08:05 +0000221}
222
223/* Given a (sub)modulename, write the potential file path in the
224 archive (without extension) to the path buffer. Return the
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400225 length of the resulting string.
226
227 return self.prefix + name.replace('.', os.sep) */
228static PyObject*
229make_filename(PyObject *prefix, PyObject *name)
Just van Rossum52e14d62002-12-30 22:08:05 +0000230{
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400231 PyObject *pathobj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200232 Py_UCS4 *p, *buf;
233 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000234
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200235 len = PyUnicode_GET_LENGTH(prefix) + PyUnicode_GET_LENGTH(name) + 1;
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +0200236 p = buf = PyMem_New(Py_UCS4, len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200237 if (buf == NULL) {
238 PyErr_NoMemory();
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400239 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200240 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000241
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200242 if (!PyUnicode_AsUCS4(prefix, p, len, 0)) {
243 PyMem_Free(buf);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200244 return NULL;
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200245 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200246 p += PyUnicode_GET_LENGTH(prefix);
247 len -= PyUnicode_GET_LENGTH(prefix);
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200248 if (!PyUnicode_AsUCS4(name, p, len, 1)) {
249 PyMem_Free(buf);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200250 return NULL;
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200251 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400252 for (; *p; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000253 if (*p == '.')
254 *p = SEP;
255 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200256 pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
257 buf, p-buf);
258 PyMem_Free(buf);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400259 return pathobj;
Just van Rossum52e14d62002-12-30 22:08:05 +0000260}
261
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000262enum zi_module_info {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000263 MI_ERROR,
264 MI_NOT_FOUND,
265 MI_MODULE,
266 MI_PACKAGE
Just van Rossum52e14d62002-12-30 22:08:05 +0000267};
268
Eric V. Smith984b11f2012-05-24 20:21:04 -0400269/* Does this path represent a directory?
270 on error, return < 0
271 if not a dir, return 0
272 if a dir, return 1
273*/
274static int
275check_is_directory(ZipImporter *self, PyObject* prefix, PyObject *path)
276{
277 PyObject *dirpath;
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700278 int res;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400279
280 /* See if this is a "directory". If so, it's eligible to be part
281 of a namespace package. We test by seeing if the name, with an
282 appended path separator, exists. */
283 dirpath = PyUnicode_FromFormat("%U%U%c", prefix, path, SEP);
284 if (dirpath == NULL)
285 return -1;
286 /* If dirpath is present in self->files, we have a directory. */
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700287 res = PyDict_Contains(self->files, dirpath);
Eric V. Smith984b11f2012-05-24 20:21:04 -0400288 Py_DECREF(dirpath);
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700289 return res;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400290}
291
Just van Rossum52e14d62002-12-30 22:08:05 +0000292/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000293static enum zi_module_info
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400294get_module_info(ZipImporter *self, PyObject *fullname)
Just van Rossum52e14d62002-12-30 22:08:05 +0000295{
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400296 PyObject *subname;
297 PyObject *path, *fullpath, *item;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +0000299
Victor Stinner965a8a12010-10-18 21:44:33 +0000300 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400301 if (subname == NULL)
302 return MI_ERROR;
Just van Rossum52e14d62002-12-30 22:08:05 +0000303
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400304 path = make_filename(self->prefix, subname);
305 Py_DECREF(subname);
306 if (path == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000307 return MI_ERROR;
Just van Rossum52e14d62002-12-30 22:08:05 +0000308
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 for (zso = zip_searchorder; *zso->suffix; zso++) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400310 fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
311 if (fullpath == NULL) {
312 Py_DECREF(path);
313 return MI_ERROR;
314 }
315 item = PyDict_GetItem(self->files, fullpath);
316 Py_DECREF(fullpath);
317 if (item != NULL) {
318 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000319 if (zso->type & IS_PACKAGE)
320 return MI_PACKAGE;
321 else
322 return MI_MODULE;
323 }
324 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400325 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 return MI_NOT_FOUND;
Just van Rossum52e14d62002-12-30 22:08:05 +0000327}
328
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700329typedef enum {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700330 FL_ERROR,
331 FL_NOT_FOUND,
332 FL_MODULE_FOUND,
333 FL_NS_FOUND
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700334} find_loader_result;
335
Eric V. Smith984b11f2012-05-24 20:21:04 -0400336/* The guts of "find_loader" and "find_module". Return values:
337 -1: error
338 0: no loader or namespace portions found
339 1: module/package found
340 2: namespace portion found: *namespace_portion will point to the name
341*/
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700342static find_loader_result
Eric V. Smith984b11f2012-05-24 20:21:04 -0400343find_loader(ZipImporter *self, PyObject *fullname, PyObject **namespace_portion)
344{
345 enum zi_module_info mi;
346
347 *namespace_portion = NULL;
348
349 mi = get_module_info(self, fullname);
350 if (mi == MI_ERROR)
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700351 return FL_ERROR;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400352 if (mi == MI_NOT_FOUND) {
353 /* Not a module or regular package. See if this is a directory, and
354 therefore possibly a portion of a namespace package. */
355 int is_dir = check_is_directory(self, self->prefix, fullname);
356 if (is_dir < 0)
357 return -1;
358 if (is_dir) {
359 /* This is possibly a portion of a namespace
360 package. Return the string representing its path,
361 without a trailing separator. */
362 *namespace_portion = PyUnicode_FromFormat("%U%c%U%U",
363 self->archive, SEP,
364 self->prefix, fullname);
365 if (*namespace_portion == NULL)
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700366 return FL_ERROR;
367 return FL_NS_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400368 }
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700369 return FL_NOT_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400370 }
371 /* This is a module or package. */
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700372 return FL_MODULE_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400373}
374
375
Just van Rossum52e14d62002-12-30 22:08:05 +0000376/* Check whether we can satisfy the import of the module named by
377 'fullname'. Return self if we can, None if we can't. */
378static PyObject *
379zipimporter_find_module(PyObject *obj, PyObject *args)
380{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 ZipImporter *self = (ZipImporter *)obj;
382 PyObject *path = NULL;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400383 PyObject *fullname;
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700384 PyObject *namespace_portion = NULL;
385 PyObject *result = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000386
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700387 if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module", &fullname, &path))
388 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000389
Eric V. Smith984b11f2012-05-24 20:21:04 -0400390 switch (find_loader(self, fullname, &namespace_portion)) {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700391 case FL_ERROR:
Benjamin Petersona6a7a1a2012-05-25 00:22:04 -0700392 return NULL;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700393 case FL_NS_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700394 /* A namespace portion is not allowed via find_module, so return None. */
Eric V. Smith984b11f2012-05-24 20:21:04 -0400395 Py_DECREF(namespace_portion);
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700396 /* FALL THROUGH */
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700397 case FL_NOT_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700398 result = Py_None;
399 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700400 case FL_MODULE_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700401 result = (PyObject *)self;
402 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000403 }
Benjamin Petersona6a7a1a2012-05-25 00:22:04 -0700404 Py_INCREF(result);
Benjamin Peterson2d12e142012-05-25 00:19:40 -0700405 return result;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400406}
407
408
409/* Check whether we can satisfy the import of the module named by
410 'fullname', or whether it could be a portion of a namespace
411 package. Return self if we can load it, a string containing the
412 full path if it's a possible namespace portion, None if we
413 can't load it. */
414static PyObject *
415zipimporter_find_loader(PyObject *obj, PyObject *args)
416{
417 ZipImporter *self = (ZipImporter *)obj;
418 PyObject *path = NULL;
419 PyObject *fullname;
420 PyObject *result = NULL;
421 PyObject *namespace_portion = NULL;
422
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700423 if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module", &fullname, &path))
424 return NULL;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400425
426 switch (find_loader(self, fullname, &namespace_portion)) {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700427 case FL_ERROR:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700428 return NULL;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700429 case FL_NOT_FOUND: /* Not found, return (None, []) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700430 result = Py_BuildValue("O[]", Py_None);
431 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700432 case FL_MODULE_FOUND: /* Return (self, []) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700433 result = Py_BuildValue("O[]", self);
434 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700435 case FL_NS_FOUND: /* Return (None, [namespace_portion]) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700436 result = Py_BuildValue("O[O]", Py_None, namespace_portion);
Benjamin Peterson209e04c2012-05-24 22:35:39 -0700437 Py_DECREF(namespace_portion);
Eric V. Smith984b11f2012-05-24 20:21:04 -0400438 return result;
439 }
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700440 return result;
Just van Rossum52e14d62002-12-30 22:08:05 +0000441}
442
443/* Load and return the module named by 'fullname'. */
444static PyObject *
445zipimporter_load_module(PyObject *obj, PyObject *args)
446{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000447 ZipImporter *self = (ZipImporter *)obj;
Victor Stinner26fabe12010-10-18 12:03:25 +0000448 PyObject *code = NULL, *mod, *dict;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400449 PyObject *fullname;
450 PyObject *modpath = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000451 int ispackage;
Just van Rossum52e14d62002-12-30 22:08:05 +0000452
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400453 if (!PyArg_ParseTuple(args, "U:zipimporter.load_module",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 &fullname))
455 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200456 if (PyUnicode_READY(fullname) == -1)
457 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000458
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 code = get_module_code(self, fullname, &ispackage, &modpath);
460 if (code == NULL)
Victor Stinner26fabe12010-10-18 12:03:25 +0000461 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000462
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400463 mod = PyImport_AddModuleObject(fullname);
Victor Stinner26fabe12010-10-18 12:03:25 +0000464 if (mod == NULL)
465 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000466 dict = PyModule_GetDict(mod);
Just van Rossum52e14d62002-12-30 22:08:05 +0000467
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000468 /* mod.__loader__ = self */
469 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
470 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000471
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000472 if (ispackage) {
473 /* add __path__ to the module *before* the code gets
474 executed */
Victor Stinneraf8b7e82013-10-29 01:46:24 +0100475 PyObject *pkgpath, *fullpath, *subname;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000476 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000477
Victor Stinneraf8b7e82013-10-29 01:46:24 +0100478 subname = get_subname(fullname);
479 if (subname == NULL)
480 goto error;
481
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400482 fullpath = PyUnicode_FromFormat("%U%c%U%U",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000483 self->archive, SEP,
484 self->prefix, subname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400485 Py_DECREF(subname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000486 if (fullpath == NULL)
487 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000488
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400489 pkgpath = Py_BuildValue("[N]", fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000490 if (pkgpath == NULL)
491 goto error;
492 err = PyDict_SetItemString(dict, "__path__", pkgpath);
493 Py_DECREF(pkgpath);
494 if (err != 0)
495 goto error;
496 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400497 mod = PyImport_ExecCodeModuleObject(fullname, code, modpath, NULL);
Victor Stinner26fabe12010-10-18 12:03:25 +0000498 Py_CLEAR(code);
499 if (mod == NULL)
500 goto error;
501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000502 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400503 PySys_FormatStderr("import %U # loaded from Zip %U\n",
Victor Stinner08654e12010-10-18 12:09:02 +0000504 fullname, modpath);
505 Py_DECREF(modpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000506 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +0000507error:
Victor Stinner26fabe12010-10-18 12:03:25 +0000508 Py_XDECREF(code);
Victor Stinner08654e12010-10-18 12:09:02 +0000509 Py_XDECREF(modpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000511}
512
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000513/* Return a string matching __file__ for the named module */
514static PyObject *
515zipimporter_get_filename(PyObject *obj, PyObject *args)
516{
517 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400518 PyObject *fullname, *code, *modpath;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000519 int ispackage;
520
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400521 if (!PyArg_ParseTuple(args, "U:zipimporter.get_filename",
Victor Stinner9e40fad2010-10-18 22:34:46 +0000522 &fullname))
Victor Stinnerc342fca2010-10-18 11:39:05 +0000523 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000524
525 /* Deciding the filename requires working out where the code
526 would come from if the module was actually loaded */
527 code = get_module_code(self, fullname, &ispackage, &modpath);
528 if (code == NULL)
Victor Stinnerc342fca2010-10-18 11:39:05 +0000529 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000530 Py_DECREF(code); /* Only need the path info */
531
Victor Stinner08654e12010-10-18 12:09:02 +0000532 return modpath;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000533}
534
Just van Rossum52e14d62002-12-30 22:08:05 +0000535/* Return a bool signifying whether the module is a package or not. */
536static PyObject *
537zipimporter_is_package(PyObject *obj, PyObject *args)
538{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400540 PyObject *fullname;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000541 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000542
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400543 if (!PyArg_ParseTuple(args, "U:zipimporter.is_package",
Victor Stinner965a8a12010-10-18 21:44:33 +0000544 &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000545 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000546
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000547 mi = get_module_info(self, fullname);
548 if (mi == MI_ERROR)
Victor Stinner965a8a12010-10-18 21:44:33 +0000549 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000550 if (mi == MI_NOT_FOUND) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400551 PyErr_Format(ZipImportError, "can't find module %R", fullname);
Victor Stinner965a8a12010-10-18 21:44:33 +0000552 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000553 }
554 return PyBool_FromLong(mi == MI_PACKAGE);
Just van Rossum52e14d62002-12-30 22:08:05 +0000555}
556
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200557
Just van Rossum52e14d62002-12-30 22:08:05 +0000558static PyObject *
559zipimporter_get_data(PyObject *obj, PyObject *args)
560{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000561 ZipImporter *self = (ZipImporter *)obj;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100562 PyObject *path, *key;
Benjamin Peterson34c15402014-02-16 14:17:28 -0500563 PyObject *toc_entry;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100564 Py_ssize_t path_start, path_len, len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000565
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100566 if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &path))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000568
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200569#ifdef ALTSEP
Martin v. Löwiscfa61292011-10-31 09:01:22 +0100570 path = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100571 if (!path)
572 return NULL;
573#else
574 Py_INCREF(path);
Just van Rossum52e14d62002-12-30 22:08:05 +0000575#endif
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100576 if (PyUnicode_READY(path) == -1)
577 goto error;
578
579 path_len = PyUnicode_GET_LENGTH(path);
580
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200581 len = PyUnicode_GET_LENGTH(self->archive);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100582 path_start = 0;
583 if (PyUnicode_Tailmatch(path, self->archive, 0, len, -1)
584 && PyUnicode_READ_CHAR(path, len) == SEP) {
585 path_start = len + 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000586 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000587
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100588 key = PyUnicode_Substring(path, path_start, path_len);
Victor Stinner60fe8d92010-08-16 23:48:11 +0000589 if (key == NULL)
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100590 goto error;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000591 toc_entry = PyDict_GetItem(self->files, key);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 if (toc_entry == NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000593 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key);
594 Py_DECREF(key);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100595 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000596 }
Victor Stinner60fe8d92010-08-16 23:48:11 +0000597 Py_DECREF(key);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100598 Py_DECREF(path);
Benjamin Peterson34c15402014-02-16 14:17:28 -0500599 return get_data(self->archive, toc_entry);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100600 error:
601 Py_DECREF(path);
602 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000603}
604
605static PyObject *
606zipimporter_get_code(PyObject *obj, PyObject *args)
607{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000608 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400609 PyObject *fullname;
Just van Rossum52e14d62002-12-30 22:08:05 +0000610
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400611 if (!PyArg_ParseTuple(args, "U:zipimporter.get_code", &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000612 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 return get_module_code(self, fullname, NULL, NULL);
Just van Rossum52e14d62002-12-30 22:08:05 +0000615}
616
617static PyObject *
618zipimporter_get_source(PyObject *obj, PyObject *args)
619{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000620 ZipImporter *self = (ZipImporter *)obj;
621 PyObject *toc_entry;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400622 PyObject *fullname, *subname, *path, *fullpath;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000623 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000624
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400625 if (!PyArg_ParseTuple(args, "U:zipimporter.get_source", &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000626 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000628 mi = get_module_info(self, fullname);
Victor Stinner965a8a12010-10-18 21:44:33 +0000629 if (mi == MI_ERROR)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000630 return NULL;
Victor Stinner04106562010-10-18 20:44:08 +0000631 if (mi == MI_NOT_FOUND) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400632 PyErr_Format(ZipImportError, "can't find module %R", fullname);
Victor Stinner04106562010-10-18 20:44:08 +0000633 return NULL;
634 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400635
Victor Stinner965a8a12010-10-18 21:44:33 +0000636 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400637 if (subname == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000638 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000639
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400640 path = make_filename(self->prefix, subname);
641 Py_DECREF(subname);
642 if (path == NULL)
643 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000644
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400645 if (mi == MI_PACKAGE)
646 fullpath = PyUnicode_FromFormat("%U%c__init__.py", path, SEP);
647 else
648 fullpath = PyUnicode_FromFormat("%U.py", path);
649 Py_DECREF(path);
650 if (fullpath == NULL)
651 return NULL;
652
653 toc_entry = PyDict_GetItem(self->files, fullpath);
654 Py_DECREF(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000655 if (toc_entry != NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000656 PyObject *res, *bytes;
Benjamin Peterson34c15402014-02-16 14:17:28 -0500657 bytes = get_data(self->archive, toc_entry);
Victor Stinner60fe8d92010-08-16 23:48:11 +0000658 if (bytes == NULL)
659 return NULL;
660 res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
661 PyBytes_GET_SIZE(bytes));
662 Py_DECREF(bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 return res;
664 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000665
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000666 /* we have the module, but no source */
Benjamin Peterson34c15402014-02-16 14:17:28 -0500667 Py_INCREF(Py_None);
668 return Py_None;
Just van Rossum52e14d62002-12-30 22:08:05 +0000669}
670
671PyDoc_STRVAR(doc_find_module,
672"find_module(fullname, path=None) -> self or None.\n\
673\n\
674Search for a module specified by 'fullname'. 'fullname' must be the\n\
675fully qualified (dotted) module name. It returns the zipimporter\n\
676instance itself if the module was found, or None if it wasn't.\n\
677The optional 'path' argument is ignored -- it's there for compatibility\n\
678with the importer protocol.");
679
Eric V. Smith984b11f2012-05-24 20:21:04 -0400680PyDoc_STRVAR(doc_find_loader,
681"find_loader(fullname, path=None) -> self, str or None.\n\
682\n\
683Search for a module specified by 'fullname'. 'fullname' must be the\n\
684fully qualified (dotted) module name. It returns the zipimporter\n\
685instance itself if the module was found, a string containing the\n\
686full path name if it's possibly a portion of a namespace package,\n\
687or None otherwise. The optional 'path' argument is ignored -- it's\n\
688 there for compatibility with the importer protocol.");
689
Just van Rossum52e14d62002-12-30 22:08:05 +0000690PyDoc_STRVAR(doc_load_module,
691"load_module(fullname) -> module.\n\
692\n\
693Load the module specified by 'fullname'. 'fullname' must be the\n\
694fully qualified (dotted) module name. It returns the imported\n\
695module, or raises ZipImportError if it wasn't found.");
696
697PyDoc_STRVAR(doc_get_data,
698"get_data(pathname) -> string with file data.\n\
699\n\
700Return the data associated with 'pathname'. Raise IOError if\n\
701the file wasn't found.");
702
703PyDoc_STRVAR(doc_is_package,
704"is_package(fullname) -> bool.\n\
705\n\
706Return True if the module specified by fullname is a package.\n\
Brian Curtin32839732010-07-21 01:44:19 +0000707Raise ZipImportError if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000708
709PyDoc_STRVAR(doc_get_code,
710"get_code(fullname) -> code object.\n\
711\n\
712Return the code object for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000713if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000714
715PyDoc_STRVAR(doc_get_source,
716"get_source(fullname) -> source string.\n\
717\n\
718Return the source code for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000719if the module couldn't be found, return None if the archive does\n\
Just van Rossum52e14d62002-12-30 22:08:05 +0000720contain the module, but has no source for it.");
721
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000722
723PyDoc_STRVAR(doc_get_filename,
Nick Coghlan9a1d6e32009-02-08 03:37:27 +0000724"get_filename(fullname) -> filename string.\n\
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000725\n\
726Return the filename for the specified module.");
727
Just van Rossum52e14d62002-12-30 22:08:05 +0000728static PyMethodDef zipimporter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 {"find_module", zipimporter_find_module, METH_VARARGS,
730 doc_find_module},
Eric V. Smith984b11f2012-05-24 20:21:04 -0400731 {"find_loader", zipimporter_find_loader, METH_VARARGS,
732 doc_find_loader},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000733 {"load_module", zipimporter_load_module, METH_VARARGS,
734 doc_load_module},
735 {"get_data", zipimporter_get_data, METH_VARARGS,
736 doc_get_data},
737 {"get_code", zipimporter_get_code, METH_VARARGS,
738 doc_get_code},
739 {"get_source", zipimporter_get_source, METH_VARARGS,
740 doc_get_source},
741 {"get_filename", zipimporter_get_filename, METH_VARARGS,
742 doc_get_filename},
743 {"is_package", zipimporter_is_package, METH_VARARGS,
744 doc_is_package},
745 {NULL, NULL} /* sentinel */
Just van Rossum52e14d62002-12-30 22:08:05 +0000746};
747
748static PyMemberDef zipimporter_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000749 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
750 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
751 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
752 {NULL}
Just van Rossum52e14d62002-12-30 22:08:05 +0000753};
754
755PyDoc_STRVAR(zipimporter_doc,
756"zipimporter(archivepath) -> zipimporter object\n\
757\n\
758Create a new zipimporter instance. 'archivepath' must be a path to\n\
Alexandre Vassalotti8ae3e052008-05-16 00:41:41 +0000759a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
760'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
761valid directory inside the archive.\n\
762\n\
763'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
764archive.\n\
765\n\
766The 'archive' attribute of zipimporter objects contains the name of the\n\
767zipfile targeted.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000768
769#define DEFERRED_ADDRESS(ADDR) 0
770
771static PyTypeObject ZipImporter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000772 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
773 "zipimport.zipimporter",
774 sizeof(ZipImporter),
775 0, /* tp_itemsize */
776 (destructor)zipimporter_dealloc, /* tp_dealloc */
777 0, /* tp_print */
778 0, /* tp_getattr */
779 0, /* tp_setattr */
780 0, /* tp_reserved */
781 (reprfunc)zipimporter_repr, /* tp_repr */
782 0, /* tp_as_number */
783 0, /* tp_as_sequence */
784 0, /* tp_as_mapping */
785 0, /* tp_hash */
786 0, /* tp_call */
787 0, /* tp_str */
788 PyObject_GenericGetAttr, /* tp_getattro */
789 0, /* tp_setattro */
790 0, /* tp_as_buffer */
791 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
792 Py_TPFLAGS_HAVE_GC, /* tp_flags */
793 zipimporter_doc, /* tp_doc */
794 zipimporter_traverse, /* tp_traverse */
795 0, /* tp_clear */
796 0, /* tp_richcompare */
797 0, /* tp_weaklistoffset */
798 0, /* tp_iter */
799 0, /* tp_iternext */
800 zipimporter_methods, /* tp_methods */
801 zipimporter_members, /* tp_members */
802 0, /* tp_getset */
803 0, /* tp_base */
804 0, /* tp_dict */
805 0, /* tp_descr_get */
806 0, /* tp_descr_set */
807 0, /* tp_dictoffset */
808 (initproc)zipimporter_init, /* tp_init */
809 PyType_GenericAlloc, /* tp_alloc */
810 PyType_GenericNew, /* tp_new */
811 PyObject_GC_Del, /* tp_free */
Just van Rossum52e14d62002-12-30 22:08:05 +0000812};
813
814
815/* implementation */
816
Just van Rossum52e14d62002-12-30 22:08:05 +0000817/* Given a buffer, return the long that is represented by the first
818 4 bytes, encoded as little endian. This partially reimplements
819 marshal.c:r_long() */
820static long
821get_long(unsigned char *buf) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000822 long x;
823 x = buf[0];
824 x |= (long)buf[1] << 8;
825 x |= (long)buf[2] << 16;
826 x |= (long)buf[3] << 24;
Just van Rossum52e14d62002-12-30 22:08:05 +0000827#if SIZEOF_LONG > 4
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000828 /* Sign extension for 64-bit machines */
829 x |= -(x & 0x80000000L);
Just van Rossum52e14d62002-12-30 22:08:05 +0000830#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000831 return x;
Just van Rossum52e14d62002-12-30 22:08:05 +0000832}
833
Gregory P. Smith2bcbc142014-01-07 18:30:07 -0800834/*
Benjamin Peterson34c15402014-02-16 14:17:28 -0500835 read_directory(archive) -> files dict (new reference)
Gregory P. Smith2bcbc142014-01-07 18:30:07 -0800836
Benjamin Peterson34c15402014-02-16 14:17:28 -0500837 Given a path to a Zip archive, build a dict, mapping file names
Just van Rossum52e14d62002-12-30 22:08:05 +0000838 (local to the archive, using SEP as a separator) to toc entries.
839
840 A toc_entry is a tuple:
841
Victor Stinner08654e12010-10-18 12:09:02 +0000842 (__file__, # value to use for __file__, available for all files,
843 # encoded to the filesystem encoding
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000844 compress, # compression kind; 0 for uncompressed
845 data_size, # size of compressed data on disk
846 file_size, # size of decompressed data
847 file_offset, # offset of file header from start of archive
848 time, # mod time of file (in dos format)
849 date, # mod data of file (in dos format)
850 crc, # crc checksum of the data
Victor Stinnerc342fca2010-10-18 11:39:05 +0000851 )
Just van Rossum52e14d62002-12-30 22:08:05 +0000852
853 Directories can be recognized by the trailing SEP in the name,
854 data_size and file_offset are 0.
855*/
856static PyObject *
Benjamin Peterson34c15402014-02-16 14:17:28 -0500857read_directory(PyObject *archive)
Just van Rossum52e14d62002-12-30 22:08:05 +0000858{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000859 PyObject *files = NULL;
Benjamin Peterson34c15402014-02-16 14:17:28 -0500860 FILE *fp;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000861 unsigned short flags;
Gregory P. Smithab320662012-01-30 15:17:33 -0800862 short compress, time, date, name_size;
863 long crc, data_size, file_size, header_size;
864 Py_ssize_t file_offset, header_position, header_offset;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200865 long l, count;
866 Py_ssize_t i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 char name[MAXPATHLEN + 5];
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200868 char dummy[8]; /* Buffer to read unused header values into */
Victor Stinner2460a432010-08-16 17:54:28 +0000869 PyObject *nameobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 char *p, endof_central_dir[22];
Gregory P. Smithab320662012-01-30 15:17:33 -0800871 Py_ssize_t arc_offset; /* Absolute offset to start of the zip-archive. */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100872 PyObject *path;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000873 const char *charset;
Victor Stinner4ee65a92011-01-22 10:30:29 +0000874 int bootstrap;
Just van Rossum52e14d62002-12-30 22:08:05 +0000875
Benjamin Peterson34c15402014-02-16 14:17:28 -0500876 fp = _Py_fopen_obj(archive, "rb");
877 if (fp == NULL) {
Victor Stinnerfbd6f9e2015-03-20 10:52:25 +0100878 if (PyErr_ExceptionMatches(PyExc_OSError)) {
879 PyObject *exc, *val, *tb;
880 PyErr_Fetch(&exc, &val, &tb);
Benjamin Peterson34c15402014-02-16 14:17:28 -0500881 PyErr_Format(ZipImportError, "can't open Zip file: %R", archive);
Victor Stinnerfbd6f9e2015-03-20 10:52:25 +0100882 _PyErr_ChainExceptions(exc, val, tb);
883 }
Benjamin Peterson34c15402014-02-16 14:17:28 -0500884 return NULL;
885 }
886
Jesus Cea09bf7a72012-10-03 02:13:05 +0200887 if (fseek(fp, -22, SEEK_END) == -1) {
Benjamin Peterson34c15402014-02-16 14:17:28 -0500888 fclose(fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +0200889 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
890 return NULL;
891 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000892 header_position = ftell(fp);
893 if (fread(endof_central_dir, 1, 22, fp) != 22) {
Benjamin Peterson34c15402014-02-16 14:17:28 -0500894 fclose(fp);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400895 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000896 return NULL;
897 }
898 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
899 /* Bad: End of Central Dir signature */
Benjamin Peterson34c15402014-02-16 14:17:28 -0500900 fclose(fp);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400901 PyErr_Format(ZipImportError, "not a Zip file: %R", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000902 return NULL;
903 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000904
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000905 header_size = get_long((unsigned char *)endof_central_dir + 12);
906 header_offset = get_long((unsigned char *)endof_central_dir + 16);
907 arc_offset = header_position - header_offset - header_size;
908 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000909
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000910 files = PyDict_New();
911 if (files == NULL)
912 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000913
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000914 /* Start of Central Directory */
915 count = 0;
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200916 if (fseek(fp, header_offset, 0) == -1)
917 goto file_error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000918 for (;;) {
919 PyObject *t;
920 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000921
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200922 /* Start of file header */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000923 l = PyMarshal_ReadLongFromFile(fp);
Victor Stinner73660af2013-10-29 01:43:44 +0100924 if (l == -1 && PyErr_Occurred())
925 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000926 if (l != 0x02014B50)
927 break; /* Bad: Central Dir File Header */
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200928
929 /* On Windows, calling fseek to skip over the fields we don't use is
930 slower than reading the data into a dummy buffer because fseek flushes
931 stdio's internal buffers. See issue #8745. */
932 if (fread(dummy, 1, 4, fp) != 4) /* Skip unused fields, avoid fseek */
933 goto file_error;
934
Victor Stinnerd36c8212010-10-18 12:13:46 +0000935 flags = (unsigned short)PyMarshal_ReadShortFromFile(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000936 compress = PyMarshal_ReadShortFromFile(fp);
937 time = PyMarshal_ReadShortFromFile(fp);
938 date = PyMarshal_ReadShortFromFile(fp);
939 crc = PyMarshal_ReadLongFromFile(fp);
940 data_size = PyMarshal_ReadLongFromFile(fp);
941 file_size = PyMarshal_ReadLongFromFile(fp);
942 name_size = PyMarshal_ReadShortFromFile(fp);
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200943 header_size = name_size +
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000944 PyMarshal_ReadShortFromFile(fp) +
945 PyMarshal_ReadShortFromFile(fp);
Victor Stinner5200f552015-03-18 13:56:25 +0100946 if (PyErr_Occurred())
947 goto error;
948
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200949 if (fread(dummy, 1, 8, fp) != 8) /* Skip unused fields, avoid fseek */
950 goto file_error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000951 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Victor Stinner73660af2013-10-29 01:43:44 +0100952 if (PyErr_Occurred())
953 goto error;
954
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 if (name_size > MAXPATHLEN)
956 name_size = MAXPATHLEN;
Just van Rossum52e14d62002-12-30 22:08:05 +0000957
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000958 p = name;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200959 for (i = 0; i < (Py_ssize_t)name_size; i++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000960 *p = (char)getc(fp);
961 if (*p == '/')
962 *p = SEP;
963 p++;
964 }
965 *p = 0; /* Add terminating null byte */
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200966 for (; i < header_size; i++) /* Skip the rest of the header */
967 if(getc(fp) == EOF) /* Avoid fseek */
968 goto file_error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000969
Victor Stinner4ee65a92011-01-22 10:30:29 +0000970 bootstrap = 0;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000971 if (flags & 0x0800)
972 charset = "utf-8";
Victor Stinner4ee65a92011-01-22 10:30:29 +0000973 else if (!PyThreadState_GET()->interp->codecs_initialized) {
974 /* During bootstrap, we may need to load the encodings
975 package from a ZIP file. But the cp437 encoding is implemented
976 in Python in the encodings package.
977
978 Break out of this dependency by assuming that the path to
979 the encodings module is ASCII-only. */
980 charset = "ascii";
981 bootstrap = 1;
982 }
Victor Stinnerd36c8212010-10-18 12:13:46 +0000983 else
984 charset = "cp437";
985 nameobj = PyUnicode_Decode(name, name_size, charset, NULL);
Victor Stinner4ee65a92011-01-22 10:30:29 +0000986 if (nameobj == NULL) {
987 if (bootstrap)
988 PyErr_Format(PyExc_NotImplementedError,
989 "bootstrap issue: python%i%i.zip contains non-ASCII "
990 "filenames without the unicode flag",
991 PY_MAJOR_VERSION, PY_MINOR_VERSION);
Victor Stinner2460a432010-08-16 17:54:28 +0000992 goto error;
Victor Stinner4ee65a92011-01-22 10:30:29 +0000993 }
Stefan Krah000fde92012-08-20 14:14:49 +0200994 if (PyUnicode_READY(nameobj) == -1)
995 goto error;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100996 path = PyUnicode_FromFormat("%U%c%U", archive, SEP, nameobj);
997 if (path == NULL)
Victor Stinner2460a432010-08-16 17:54:28 +0000998 goto error;
Gregory P. Smithcc6abd52012-01-30 15:55:29 -0800999 t = Py_BuildValue("Nhllnhhl", path, compress, data_size,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001000 file_size, file_offset, time, date, crc);
1001 if (t == NULL)
1002 goto error;
Victor Stinner2460a432010-08-16 17:54:28 +00001003 err = PyDict_SetItem(files, nameobj, t);
1004 Py_CLEAR(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001005 Py_DECREF(t);
1006 if (err != 0)
1007 goto error;
1008 count++;
1009 }
Benjamin Peterson34c15402014-02-16 14:17:28 -05001010 fclose(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001011 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001012 PySys_FormatStderr("# zipimport: found %ld names in %R\n",
1013 count, archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001014 return files;
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +02001015file_error:
Benjamin Peterson34c15402014-02-16 14:17:28 -05001016 fclose(fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001017 Py_XDECREF(files);
1018 Py_XDECREF(nameobj);
1019 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1020 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001021error:
Benjamin Peterson34c15402014-02-16 14:17:28 -05001022 fclose(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001023 Py_XDECREF(files);
Victor Stinner2460a432010-08-16 17:54:28 +00001024 Py_XDECREF(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001025 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001026}
1027
1028/* Return the zlib.decompress function object, or NULL if zlib couldn't
1029 be imported. The function is cached when found, so subsequent calls
Victor Stinner4925cde2011-05-20 00:16:09 +02001030 don't import zlib again. */
Just van Rossum52e14d62002-12-30 22:08:05 +00001031static PyObject *
1032get_decompress_func(void)
1033{
Victor Stinner4925cde2011-05-20 00:16:09 +02001034 static int importing_zlib = 0;
1035 PyObject *zlib;
1036 PyObject *decompress;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001037 _Py_IDENTIFIER(decompress);
Just van Rossum52e14d62002-12-30 22:08:05 +00001038
Victor Stinner4925cde2011-05-20 00:16:09 +02001039 if (importing_zlib != 0)
1040 /* Someone has a zlib.py[co] in their Zip file;
1041 let's avoid a stack overflow. */
1042 return NULL;
1043 importing_zlib = 1;
1044 zlib = PyImport_ImportModuleNoBlock("zlib");
1045 importing_zlib = 0;
1046 if (zlib != NULL) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001047 decompress = _PyObject_GetAttrId(zlib,
1048 &PyId_decompress);
Victor Stinner4925cde2011-05-20 00:16:09 +02001049 Py_DECREF(zlib);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001050 }
Victor Stinner4925cde2011-05-20 00:16:09 +02001051 else {
1052 PyErr_Clear();
1053 decompress = NULL;
1054 }
1055 if (Py_VerboseFlag)
1056 PySys_WriteStderr("# zipimport: zlib %s\n",
1057 zlib != NULL ? "available": "UNAVAILABLE");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001058 return decompress;
Just van Rossum52e14d62002-12-30 22:08:05 +00001059}
1060
Benjamin Peterson34c15402014-02-16 14:17:28 -05001061/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
Just van Rossum52e14d62002-12-30 22:08:05 +00001062 data as a new reference. */
1063static PyObject *
Benjamin Peterson34c15402014-02-16 14:17:28 -05001064get_data(PyObject *archive, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +00001065{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 PyObject *raw_data, *data = NULL, *decompress;
1067 char *buf;
Benjamin Peterson34c15402014-02-16 14:17:28 -05001068 FILE *fp;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 int err;
1070 Py_ssize_t bytes_read = 0;
1071 long l;
Victor Stinner60fe8d92010-08-16 23:48:11 +00001072 PyObject *datapath;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001073 long compress, data_size, file_size, file_offset, bytes_size;
1074 long time, date, crc;
Just van Rossum52e14d62002-12-30 22:08:05 +00001075
Victor Stinner60fe8d92010-08-16 23:48:11 +00001076 if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001077 &data_size, &file_size, &file_offset, &time,
1078 &date, &crc)) {
1079 return NULL;
1080 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001081
Benjamin Peterson34c15402014-02-16 14:17:28 -05001082 fp = _Py_fopen_obj(archive, "rb");
Victor Stinnere42ccd22015-03-18 01:39:23 +01001083 if (!fp)
Benjamin Peterson34c15402014-02-16 14:17:28 -05001084 return NULL;
Benjamin Peterson34c15402014-02-16 14:17:28 -05001085
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 /* Check to make sure the local file header is correct */
Jesus Cea09bf7a72012-10-03 02:13:05 +02001087 if (fseek(fp, file_offset, 0) == -1) {
Benjamin Peterson34c15402014-02-16 14:17:28 -05001088 fclose(fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001089 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1090 return NULL;
1091 }
1092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 l = PyMarshal_ReadLongFromFile(fp);
1094 if (l != 0x04034B50) {
1095 /* Bad: Local File Header */
Victor Stinner73660af2013-10-29 01:43:44 +01001096 if (!PyErr_Occurred())
1097 PyErr_Format(ZipImportError,
1098 "bad local file header in %U",
1099 archive);
Benjamin Peterson34c15402014-02-16 14:17:28 -05001100 fclose(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 return NULL;
1102 }
Jesus Cea09bf7a72012-10-03 02:13:05 +02001103 if (fseek(fp, file_offset + 26, 0) == -1) {
Benjamin Peterson34c15402014-02-16 14:17:28 -05001104 fclose(fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001105 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1106 return NULL;
1107 }
1108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 l = 30 + PyMarshal_ReadShortFromFile(fp) +
1110 PyMarshal_ReadShortFromFile(fp); /* local header size */
Victor Stinner73660af2013-10-29 01:43:44 +01001111 if (PyErr_Occurred()) {
Benjamin Peterson34c15402014-02-16 14:17:28 -05001112 fclose(fp);
Victor Stinner73660af2013-10-29 01:43:44 +01001113 return NULL;
1114 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 file_offset += l; /* Start of file data */
Just van Rossum52e14d62002-12-30 22:08:05 +00001116
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 bytes_size = compress == 0 ? data_size : data_size + 1;
1118 if (bytes_size == 0)
1119 bytes_size++;
1120 raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
Just van Rossum52e14d62002-12-30 22:08:05 +00001121
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001122 if (raw_data == NULL) {
Benjamin Peterson34c15402014-02-16 14:17:28 -05001123 fclose(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001124 return NULL;
1125 }
1126 buf = PyBytes_AsString(raw_data);
Just van Rossum52e14d62002-12-30 22:08:05 +00001127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 err = fseek(fp, file_offset, 0);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001129 if (err == 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 bytes_read = fread(buf, 1, data_size, fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001131 } else {
Benjamin Peterson34c15402014-02-16 14:17:28 -05001132 fclose(fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001133 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1134 return NULL;
1135 }
Benjamin Peterson34c15402014-02-16 14:17:28 -05001136 fclose(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 if (err || bytes_read != data_size) {
1138 PyErr_SetString(PyExc_IOError,
1139 "zipimport: can't read data");
1140 Py_DECREF(raw_data);
1141 return NULL;
1142 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 if (compress != 0) {
1145 buf[data_size] = 'Z'; /* saw this in zipfile.py */
1146 data_size++;
1147 }
1148 buf[data_size] = '\0';
Just van Rossum52e14d62002-12-30 22:08:05 +00001149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 if (compress == 0) { /* data is not compressed */
1151 data = PyBytes_FromStringAndSize(buf, data_size);
1152 Py_DECREF(raw_data);
1153 return data;
1154 }
1155
1156 /* Decompress with zlib */
1157 decompress = get_decompress_func();
1158 if (decompress == NULL) {
1159 PyErr_SetString(ZipImportError,
1160 "can't decompress data; "
1161 "zlib not available");
1162 goto error;
1163 }
1164 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Victor Stinner4925cde2011-05-20 00:16:09 +02001165 Py_DECREF(decompress);
Just van Rossum52e14d62002-12-30 22:08:05 +00001166error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001167 Py_DECREF(raw_data);
1168 return data;
Just van Rossum52e14d62002-12-30 22:08:05 +00001169}
1170
1171/* Lenient date/time comparison function. The precision of the mtime
1172 in the archive is lower than the mtime stored in a .pyc: we
1173 must allow a difference of at most one second. */
1174static int
1175eq_mtime(time_t t1, time_t t2)
1176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001177 time_t d = t1 - t2;
1178 if (d < 0)
1179 d = -d;
1180 /* dostime only stores even seconds, so be lenient */
1181 return d <= 1;
Just van Rossum52e14d62002-12-30 22:08:05 +00001182}
1183
1184/* Given the contents of a .py[co] file in a buffer, unmarshal the data
1185 and return the code object. Return None if it the magic word doesn't
1186 match (we do this instead of raising an exception as we fall back
1187 to .py if available and we don't want to mask other errors).
1188 Returns a new reference. */
1189static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001190unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
Just van Rossum52e14d62002-12-30 22:08:05 +00001191{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 PyObject *code;
1193 char *buf = PyBytes_AsString(data);
1194 Py_ssize_t size = PyBytes_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +00001195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001196 if (size <= 9) {
1197 PyErr_SetString(ZipImportError,
1198 "bad pyc data");
1199 return NULL;
1200 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001201
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
1203 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001204 PySys_FormatStderr("# %R has bad magic\n",
1205 pathname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 Py_INCREF(Py_None);
1207 return Py_None; /* signal caller to try alternative */
1208 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001209
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
1211 mtime)) {
1212 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001213 PySys_FormatStderr("# %R has bad mtime\n",
1214 pathname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001215 Py_INCREF(Py_None);
1216 return Py_None; /* signal caller to try alternative */
1217 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001218
Antoine Pitrou5136ac02012-01-13 18:52:16 +01001219 /* XXX the pyc's size field is ignored; timestamp collisions are probably
1220 unimportant with zip files. */
1221 code = PyMarshal_ReadObjectFromString(buf + 12, size - 12);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001222 if (code == NULL)
1223 return NULL;
1224 if (!PyCode_Check(code)) {
1225 Py_DECREF(code);
1226 PyErr_Format(PyExc_TypeError,
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001227 "compiled module %R is not a code object",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 pathname);
1229 return NULL;
1230 }
1231 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001232}
1233
1234/* Replace any occurances of "\r\n?" in the input string with "\n".
1235 This converts DOS and Mac line endings to Unix line endings.
1236 Also append a trailing "\n" to be compatible with
1237 PyParser_SimpleParseFile(). Returns a new reference. */
1238static PyObject *
1239normalize_line_endings(PyObject *source)
1240{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001241 char *buf, *q, *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 PyObject *fixed_source;
1243 int len = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +00001244
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001245 p = PyBytes_AsString(source);
1246 if (p == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001247 return PyBytes_FromStringAndSize("\n\0", 2);
1248 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 /* one char extra for trailing \n and one for terminating \0 */
1251 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1252 if (buf == NULL) {
1253 PyErr_SetString(PyExc_MemoryError,
1254 "zipimport: no memory to allocate "
1255 "source buffer");
1256 return NULL;
1257 }
1258 /* replace "\r\n?" by "\n" */
1259 for (q = buf; *p != '\0'; p++) {
1260 if (*p == '\r') {
1261 *q++ = '\n';
1262 if (*(p + 1) == '\n')
1263 p++;
1264 }
1265 else
1266 *q++ = *p;
1267 len++;
1268 }
1269 *q++ = '\n'; /* add trailing \n */
1270 *q = '\0';
1271 fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1272 PyMem_Free(buf);
1273 return fixed_source;
Just van Rossum52e14d62002-12-30 22:08:05 +00001274}
1275
1276/* Given a string buffer containing Python source code, compile it
Brett Cannon83358c92013-06-20 21:30:32 -04001277 and return a code object as a new reference. */
Just van Rossum52e14d62002-12-30 22:08:05 +00001278static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001279compile_source(PyObject *pathname, PyObject *source)
Just van Rossum52e14d62002-12-30 22:08:05 +00001280{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001281 PyObject *code, *fixed_source, *pathbytes;
Just van Rossum52e14d62002-12-30 22:08:05 +00001282
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001283 pathbytes = PyUnicode_EncodeFSDefault(pathname);
1284 if (pathbytes == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001285 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001286
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001287 fixed_source = normalize_line_endings(source);
1288 if (fixed_source == NULL) {
1289 Py_DECREF(pathbytes);
1290 return NULL;
1291 }
1292
1293 code = Py_CompileString(PyBytes_AsString(fixed_source),
1294 PyBytes_AsString(pathbytes),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 Py_file_input);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001296 Py_DECREF(pathbytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001297 Py_DECREF(fixed_source);
1298 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001299}
1300
1301/* Convert the date/time values found in the Zip archive to a value
1302 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001303static time_t
1304parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001305{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 struct tm stm;
Just van Rossum52e14d62002-12-30 22:08:05 +00001307
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001308 memset((void *) &stm, '\0', sizeof(stm));
Christian Heimes679db4a2008-01-18 09:56:22 +00001309
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001310 stm.tm_sec = (dostime & 0x1f) * 2;
1311 stm.tm_min = (dostime >> 5) & 0x3f;
1312 stm.tm_hour = (dostime >> 11) & 0x1f;
1313 stm.tm_mday = dosdate & 0x1f;
1314 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1315 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
1316 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001317
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001318 return mktime(&stm);
Just van Rossum52e14d62002-12-30 22:08:05 +00001319}
1320
1321/* Given a path to a .pyc or .pyo file in the archive, return the
Ezio Melotti13925002011-03-16 11:05:33 +02001322 modification time of the matching .py file, or 0 if no source
Just van Rossum52e14d62002-12-30 22:08:05 +00001323 is available. */
1324static time_t
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001325get_mtime_of_source(ZipImporter *self, PyObject *path)
Just van Rossum52e14d62002-12-30 22:08:05 +00001326{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001327 PyObject *toc_entry, *stripped;
1328 time_t mtime;
1329
1330 /* strip 'c' or 'o' from *.py[co] */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001331 if (PyUnicode_READY(path) == -1)
1332 return (time_t)-1;
1333 stripped = PyUnicode_FromKindAndData(PyUnicode_KIND(path),
1334 PyUnicode_DATA(path),
1335 PyUnicode_GET_LENGTH(path) - 1);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001336 if (stripped == NULL)
1337 return (time_t)-1;
1338
1339 toc_entry = PyDict_GetItem(self->files, stripped);
1340 Py_DECREF(stripped);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001341 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1342 PyTuple_Size(toc_entry) == 8) {
1343 /* fetch the time stamp of the .py file for comparison
1344 with an embedded pyc time stamp */
1345 int time, date;
1346 time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1347 date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1348 mtime = parse_dostime(time, date);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001349 } else
1350 mtime = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001351 return mtime;
Just van Rossum52e14d62002-12-30 22:08:05 +00001352}
1353
1354/* Return the code object for the module named by 'fullname' from the
1355 Zip archive as a new reference. */
1356static PyObject *
Benjamin Peterson34c15402014-02-16 14:17:28 -05001357get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001358 time_t mtime, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +00001359{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001360 PyObject *data, *modpath, *code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001361
Benjamin Peterson34c15402014-02-16 14:17:28 -05001362 data = get_data(self->archive, toc_entry);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001363 if (data == NULL)
1364 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001365
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001366 modpath = PyTuple_GetItem(toc_entry, 0);
Victor Stinner2a94f4c2010-10-18 12:15:34 +00001367 if (isbytecode)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001368 code = unmarshal_code(modpath, data, mtime);
Victor Stinner2a94f4c2010-10-18 12:15:34 +00001369 else
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001370 code = compile_source(modpath, data);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 Py_DECREF(data);
1372 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001373}
1374
Ezio Melotti42da6632011-03-15 05:18:48 +02001375/* Get the code object associated with the module specified by
Just van Rossum52e14d62002-12-30 22:08:05 +00001376 'fullname'. */
1377static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001378get_module_code(ZipImporter *self, PyObject *fullname,
Victor Stinner08654e12010-10-18 12:09:02 +00001379 int *p_ispackage, PyObject **p_modpath)
Just van Rossum52e14d62002-12-30 22:08:05 +00001380{
Gregory P. Smith95c7c462011-05-21 05:19:42 -07001381 PyObject *code = NULL, *toc_entry, *subname;
Victor Stinner9a2261a2011-05-26 13:59:41 +02001382 PyObject *path, *fullpath = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +00001384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001385 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001386 if (subname == NULL)
1387 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001388
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001389 path = make_filename(self->prefix, subname);
1390 Py_DECREF(subname);
1391 if (path == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001393
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 for (zso = zip_searchorder; *zso->suffix; zso++) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001395 code = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001396
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001397 fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
1398 if (fullpath == NULL)
1399 goto exit;
1400
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 if (Py_VerboseFlag > 1)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001402 PySys_FormatStderr("# trying %U%c%U\n",
1403 self->archive, (int)SEP, fullpath);
1404 toc_entry = PyDict_GetItem(self->files, fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001405 if (toc_entry != NULL) {
1406 time_t mtime = 0;
1407 int ispackage = zso->type & IS_PACKAGE;
1408 int isbytecode = zso->type & IS_BYTECODE;
Just van Rossum52e14d62002-12-30 22:08:05 +00001409
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001410 if (isbytecode) {
1411 mtime = get_mtime_of_source(self, fullpath);
1412 if (mtime == (time_t)-1 && PyErr_Occurred()) {
1413 goto exit;
1414 }
1415 }
1416 Py_CLEAR(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 if (p_ispackage != NULL)
1418 *p_ispackage = ispackage;
Benjamin Peterson34c15402014-02-16 14:17:28 -05001419 code = get_code_from_data(self, ispackage,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 isbytecode, mtime,
1421 toc_entry);
1422 if (code == Py_None) {
1423 /* bad magic number or non-matching mtime
1424 in byte code, try next */
1425 Py_DECREF(code);
1426 continue;
1427 }
Victor Stinner08654e12010-10-18 12:09:02 +00001428 if (code != NULL && p_modpath != NULL) {
1429 *p_modpath = PyTuple_GetItem(toc_entry, 0);
1430 Py_INCREF(*p_modpath);
1431 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001432 goto exit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001433 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001434 else
1435 Py_CLEAR(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001436 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001437 PyErr_Format(ZipImportError, "can't find module %R", fullname);
1438exit:
1439 Py_DECREF(path);
1440 Py_XDECREF(fullpath);
1441 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001442}
1443
1444
1445/* Module init */
1446
1447PyDoc_STRVAR(zipimport_doc,
1448"zipimport provides support for importing Python modules from Zip archives.\n\
1449\n\
1450This module exports three objects:\n\
1451- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001452- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001453 subclass of ImportError, so it can be caught as ImportError, too.\n\
1454- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1455 info dicts, as used in zipimporter._files.\n\
1456\n\
1457It is usually not needed to use the zipimport module explicitly; it is\n\
1458used by the builtin import mechanism for sys.path items that are paths\n\
1459to Zip archives.");
1460
Martin v. Löwis1a214512008-06-11 05:26:20 +00001461static struct PyModuleDef zipimportmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 PyModuleDef_HEAD_INIT,
1463 "zipimport",
1464 zipimport_doc,
1465 -1,
1466 NULL,
1467 NULL,
1468 NULL,
1469 NULL,
1470 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001471};
1472
Just van Rossum52e14d62002-12-30 22:08:05 +00001473PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001474PyInit_zipimport(void)
Just van Rossum52e14d62002-12-30 22:08:05 +00001475{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001476 PyObject *mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001477
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001478 if (PyType_Ready(&ZipImporter_Type) < 0)
1479 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001480
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001481 /* Correct directory separator */
1482 zip_searchorder[0].suffix[0] = SEP;
1483 zip_searchorder[1].suffix[0] = SEP;
1484 zip_searchorder[2].suffix[0] = SEP;
1485 if (Py_OptimizeFlag) {
1486 /* Reverse *.pyc and *.pyo */
1487 struct st_zip_searchorder tmp;
1488 tmp = zip_searchorder[0];
1489 zip_searchorder[0] = zip_searchorder[1];
1490 zip_searchorder[1] = tmp;
1491 tmp = zip_searchorder[3];
1492 zip_searchorder[3] = zip_searchorder[4];
1493 zip_searchorder[4] = tmp;
1494 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001495
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001496 mod = PyModule_Create(&zipimportmodule);
1497 if (mod == NULL)
1498 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001499
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001500 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1501 PyExc_ImportError, NULL);
1502 if (ZipImportError == NULL)
1503 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001504
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001505 Py_INCREF(ZipImportError);
1506 if (PyModule_AddObject(mod, "ZipImportError",
1507 ZipImportError) < 0)
1508 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 Py_INCREF(&ZipImporter_Type);
1511 if (PyModule_AddObject(mod, "zipimporter",
1512 (PyObject *)&ZipImporter_Type) < 0)
1513 return NULL;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001514
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001515 zip_directory_cache = PyDict_New();
1516 if (zip_directory_cache == NULL)
1517 return NULL;
1518 Py_INCREF(zip_directory_cache);
1519 if (PyModule_AddObject(mod, "_zip_directory_cache",
1520 zip_directory_cache) < 0)
1521 return NULL;
1522 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001523}