blob: 8fe919539fdfffd7b1e13be5002c443cfdac4f8e [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000013 char suffix[14];
14 int type;
Just van Rossum52e14d62002-12-30 22:08:05 +000015};
16
Victor Stinner651f9f72013-11-12 21:44:18 +010017#ifdef ALTSEP
18_Py_IDENTIFIER(replace);
19#endif
20
Just van Rossum52e14d62002-12-30 22:08:05 +000021/* zip_searchorder defines how we search for a module in the Zip
22 archive: we first search for a package __init__, then for
23 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
24 are swapped by initzipimport() if we run in optimized mode. Also,
25 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000026static struct st_zip_searchorder zip_searchorder[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000027 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
28 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
29 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
30 {".pyc", IS_BYTECODE},
31 {".pyo", IS_BYTECODE},
32 {".py", IS_SOURCE},
33 {"", 0}
Just van Rossum52e14d62002-12-30 22:08:05 +000034};
35
36/* zipimporter object definition and support */
37
38typedef struct _zipimporter ZipImporter;
39
40struct _zipimporter {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000041 PyObject_HEAD
Victor Stinner9e40fad2010-10-18 22:34:46 +000042 PyObject *archive; /* pathname of the Zip archive,
43 decoded from the filesystem encoding */
Victor Stinner72f767e2010-10-18 11:44:21 +000044 PyObject *prefix; /* file prefix: "a/sub/directory/",
45 encoded to the filesystem encoding */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000046 PyObject *files; /* dict with file info {path: toc_entry} */
Just van Rossum52e14d62002-12-30 22:08:05 +000047};
48
Just van Rossum52e14d62002-12-30 22:08:05 +000049static PyObject *ZipImportError;
Victor Stinnerc342fca2010-10-18 11:39:05 +000050/* read_directory() cache */
Just van Rossum52e14d62002-12-30 22:08:05 +000051static PyObject *zip_directory_cache = NULL;
52
53/* forward decls */
Benjamin Peterson34c15402014-02-16 14:17:28 -050054static PyObject *read_directory(PyObject *archive);
55static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
Victor Stinnerf6b563a2011-03-14 20:46:50 -040056static PyObject *get_module_code(ZipImporter *self, PyObject *fullname,
Victor Stinner08654e12010-10-18 12:09:02 +000057 int *p_ispackage, PyObject **p_modpath);
Just van Rossum52e14d62002-12-30 22:08:05 +000058
59
60#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
61
62
63/* zipimporter.__init__
64 Split the "subdirectory" from the Zip archive path, lookup a matching
65 entry in sys.path_importer_cache, fetch the file directory from there
66 if found, or else read it from the archive. */
67static int
68zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
69{
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010070 PyObject *path, *files, *tmp;
71 PyObject *filename = NULL;
72 Py_ssize_t len, flen;
Just van Rossum52e14d62002-12-30 22:08:05 +000073
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 if (!_PyArg_NoKeywords("zipimporter()", kwds))
75 return -1;
Georg Brandl02c42872005-08-26 06:42:30 +000076
Victor Stinner2b8dab72010-08-14 14:54:10 +000077 if (!PyArg_ParseTuple(args, "O&:zipimporter",
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010078 PyUnicode_FSDecoder, &path))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +000080
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010081 if (PyUnicode_READY(path) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020082 return -1;
83
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010084 len = PyUnicode_GET_LENGTH(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 if (len == 0) {
86 PyErr_SetString(ZipImportError, "archive path is empty");
Victor Stinner2b8dab72010-08-14 14:54:10 +000087 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000088 }
Just van Rossum52e14d62002-12-30 22:08:05 +000089
90#ifdef ALTSEP
Martin v. Löwiscfa61292011-10-31 09:01:22 +010091 tmp = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010092 if (!tmp)
93 goto error;
94 Py_DECREF(path);
95 path = tmp;
Just van Rossum52e14d62002-12-30 22:08:05 +000096#endif
97
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010098 filename = path;
99 Py_INCREF(filename);
100 flen = len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 for (;;) {
102 struct stat statbuf;
103 int rv;
Just van Rossum52e14d62002-12-30 22:08:05 +0000104
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100105 rv = _Py_stat(filename, &statbuf);
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100106 if (rv == -2)
107 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108 if (rv == 0) {
109 /* it exists */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100110 if (!S_ISREG(statbuf.st_mode))
111 /* it's a not file */
112 Py_CLEAR(filename);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 break;
114 }
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100115 Py_CLEAR(filename);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000116 /* back up one path element */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100117 flen = PyUnicode_FindChar(path, SEP, 0, flen, -1);
118 if (flen == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 break;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100120 filename = PyUnicode_Substring(path, 0, flen);
Victor Stinneraf8b7e82013-10-29 01:46:24 +0100121 if (filename == NULL)
122 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 }
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100124 if (filename == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 PyErr_SetString(ZipImportError, "not a Zip file");
Victor Stinner2b8dab72010-08-14 14:54:10 +0000126 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000128
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100129 if (PyUnicode_READY(filename) < 0)
130 goto error;
131
132 files = PyDict_GetItem(zip_directory_cache, filename);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000133 if (files == NULL) {
Benjamin Peterson34c15402014-02-16 14:17:28 -0500134 files = read_directory(filename);
135 if (files == NULL)
Victor Stinner2b8dab72010-08-14 14:54:10 +0000136 goto error;
Benjamin Peterson34c15402014-02-16 14:17:28 -0500137 if (PyDict_SetItem(zip_directory_cache, filename, files) != 0)
Victor Stinner2b8dab72010-08-14 14:54:10 +0000138 goto error;
139 }
140 else
141 Py_INCREF(files);
142 self->files = files;
143
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100144 /* Transfer reference */
145 self->archive = filename;
146 filename = NULL;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000147
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100148 /* Check if there is a prefix directory following the filename. */
149 if (flen != len) {
150 tmp = PyUnicode_Substring(path, flen+1,
151 PyUnicode_GET_LENGTH(path));
152 if (tmp == NULL)
153 goto error;
154 self->prefix = tmp;
155 if (PyUnicode_READ_CHAR(path, len-1) != SEP) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 /* add trailing SEP */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100157 tmp = PyUnicode_FromFormat("%U%c", self->prefix, SEP);
158 if (tmp == NULL)
159 goto error;
160 Py_DECREF(self->prefix);
161 self->prefix = tmp;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000162 }
163 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000164 else
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100165 self->prefix = PyUnicode_New(0, 0);
166 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 return 0;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000168
169error:
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100170 Py_DECREF(path);
171 Py_XDECREF(filename);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000172 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +0000173}
174
175/* GC support. */
176static int
177zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
178{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000179 ZipImporter *self = (ZipImporter *)obj;
180 Py_VISIT(self->files);
181 return 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000182}
183
184static void
185zipimporter_dealloc(ZipImporter *self)
186{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 PyObject_GC_UnTrack(self);
188 Py_XDECREF(self->archive);
189 Py_XDECREF(self->prefix);
190 Py_XDECREF(self->files);
191 Py_TYPE(self)->tp_free((PyObject *)self);
Just van Rossum52e14d62002-12-30 22:08:05 +0000192}
193
194static PyObject *
195zipimporter_repr(ZipImporter *self)
196{
Victor Stinner028dd972010-08-17 00:04:48 +0000197 if (self->archive == NULL)
198 return PyUnicode_FromString("<zipimporter object \"???\">");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200199 else if (self->prefix != NULL && PyUnicode_GET_LENGTH(self->prefix) != 0)
Victor Stinner07298a12010-10-18 22:45:54 +0000200 return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">",
Victor Stinner028dd972010-08-17 00:04:48 +0000201 self->archive, SEP, self->prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000202 else
Victor Stinner07298a12010-10-18 22:45:54 +0000203 return PyUnicode_FromFormat("<zipimporter object \"%U\">",
Victor Stinner028dd972010-08-17 00:04:48 +0000204 self->archive);
Just van Rossum52e14d62002-12-30 22:08:05 +0000205}
206
207/* return fullname.split(".")[-1] */
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400208static PyObject *
209get_subname(PyObject *fullname)
Just van Rossum52e14d62002-12-30 22:08:05 +0000210{
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100211 Py_ssize_t len, dot;
212 if (PyUnicode_READY(fullname) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200213 return NULL;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100214 len = PyUnicode_GET_LENGTH(fullname);
215 dot = PyUnicode_FindChar(fullname, '.', 0, len, -1);
216 if (dot == -1) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400217 Py_INCREF(fullname);
218 return fullname;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100219 } else
220 return PyUnicode_Substring(fullname, dot+1, len);
Just van Rossum52e14d62002-12-30 22:08:05 +0000221}
222
223/* Given a (sub)modulename, write the potential file path in the
224 archive (without extension) to the path buffer. Return the
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400225 length of the resulting string.
226
227 return self.prefix + name.replace('.', os.sep) */
228static PyObject*
229make_filename(PyObject *prefix, PyObject *name)
Just van Rossum52e14d62002-12-30 22:08:05 +0000230{
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400231 PyObject *pathobj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200232 Py_UCS4 *p, *buf;
233 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000234
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200235 len = PyUnicode_GET_LENGTH(prefix) + PyUnicode_GET_LENGTH(name) + 1;
236 p = buf = PyMem_Malloc(sizeof(Py_UCS4) * len);
237 if (buf == NULL) {
238 PyErr_NoMemory();
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400239 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200240 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000241
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200242 if (!PyUnicode_AsUCS4(prefix, p, len, 0)) {
243 PyMem_Free(buf);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200244 return NULL;
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200245 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200246 p += PyUnicode_GET_LENGTH(prefix);
247 len -= PyUnicode_GET_LENGTH(prefix);
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200248 if (!PyUnicode_AsUCS4(name, p, len, 1)) {
249 PyMem_Free(buf);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200250 return NULL;
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200251 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400252 for (; *p; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000253 if (*p == '.')
254 *p = SEP;
255 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200256 pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
257 buf, p-buf);
258 PyMem_Free(buf);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400259 return pathobj;
Just van Rossum52e14d62002-12-30 22:08:05 +0000260}
261
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000262enum zi_module_info {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000263 MI_ERROR,
264 MI_NOT_FOUND,
265 MI_MODULE,
266 MI_PACKAGE
Just van Rossum52e14d62002-12-30 22:08:05 +0000267};
268
Eric V. Smith984b11f2012-05-24 20:21:04 -0400269/* Does this path represent a directory?
270 on error, return < 0
271 if not a dir, return 0
272 if a dir, return 1
273*/
274static int
275check_is_directory(ZipImporter *self, PyObject* prefix, PyObject *path)
276{
277 PyObject *dirpath;
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700278 int res;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400279
280 /* See if this is a "directory". If so, it's eligible to be part
281 of a namespace package. We test by seeing if the name, with an
282 appended path separator, exists. */
283 dirpath = PyUnicode_FromFormat("%U%U%c", prefix, path, SEP);
284 if (dirpath == NULL)
285 return -1;
286 /* If dirpath is present in self->files, we have a directory. */
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700287 res = PyDict_Contains(self->files, dirpath);
Eric V. Smith984b11f2012-05-24 20:21:04 -0400288 Py_DECREF(dirpath);
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700289 return res;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400290}
291
Just van Rossum52e14d62002-12-30 22:08:05 +0000292/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000293static enum zi_module_info
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400294get_module_info(ZipImporter *self, PyObject *fullname)
Just van Rossum52e14d62002-12-30 22:08:05 +0000295{
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400296 PyObject *subname;
297 PyObject *path, *fullpath, *item;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +0000299
Victor Stinner965a8a12010-10-18 21:44:33 +0000300 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400301 if (subname == NULL)
302 return MI_ERROR;
Just van Rossum52e14d62002-12-30 22:08:05 +0000303
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400304 path = make_filename(self->prefix, subname);
305 Py_DECREF(subname);
306 if (path == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000307 return MI_ERROR;
Just van Rossum52e14d62002-12-30 22:08:05 +0000308
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 for (zso = zip_searchorder; *zso->suffix; zso++) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400310 fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
311 if (fullpath == NULL) {
312 Py_DECREF(path);
313 return MI_ERROR;
314 }
315 item = PyDict_GetItem(self->files, fullpath);
316 Py_DECREF(fullpath);
317 if (item != NULL) {
318 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000319 if (zso->type & IS_PACKAGE)
320 return MI_PACKAGE;
321 else
322 return MI_MODULE;
323 }
324 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400325 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 return MI_NOT_FOUND;
Just van Rossum52e14d62002-12-30 22:08:05 +0000327}
328
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700329typedef enum {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700330 FL_ERROR,
331 FL_NOT_FOUND,
332 FL_MODULE_FOUND,
333 FL_NS_FOUND
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700334} find_loader_result;
335
Eric V. Smith984b11f2012-05-24 20:21:04 -0400336/* The guts of "find_loader" and "find_module". Return values:
337 -1: error
338 0: no loader or namespace portions found
339 1: module/package found
340 2: namespace portion found: *namespace_portion will point to the name
341*/
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700342static find_loader_result
Eric V. Smith984b11f2012-05-24 20:21:04 -0400343find_loader(ZipImporter *self, PyObject *fullname, PyObject **namespace_portion)
344{
345 enum zi_module_info mi;
346
347 *namespace_portion = NULL;
348
349 mi = get_module_info(self, fullname);
350 if (mi == MI_ERROR)
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700351 return FL_ERROR;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400352 if (mi == MI_NOT_FOUND) {
353 /* Not a module or regular package. See if this is a directory, and
354 therefore possibly a portion of a namespace package. */
355 int is_dir = check_is_directory(self, self->prefix, fullname);
356 if (is_dir < 0)
357 return -1;
358 if (is_dir) {
359 /* This is possibly a portion of a namespace
360 package. Return the string representing its path,
361 without a trailing separator. */
362 *namespace_portion = PyUnicode_FromFormat("%U%c%U%U",
363 self->archive, SEP,
364 self->prefix, fullname);
365 if (*namespace_portion == NULL)
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700366 return FL_ERROR;
367 return FL_NS_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400368 }
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700369 return FL_NOT_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400370 }
371 /* This is a module or package. */
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700372 return FL_MODULE_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400373}
374
375
Just van Rossum52e14d62002-12-30 22:08:05 +0000376/* Check whether we can satisfy the import of the module named by
377 'fullname'. Return self if we can, None if we can't. */
378static PyObject *
379zipimporter_find_module(PyObject *obj, PyObject *args)
380{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 ZipImporter *self = (ZipImporter *)obj;
382 PyObject *path = NULL;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400383 PyObject *fullname;
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700384 PyObject *namespace_portion = NULL;
385 PyObject *result = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000386
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700387 if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module", &fullname, &path))
388 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000389
Eric V. Smith984b11f2012-05-24 20:21:04 -0400390 switch (find_loader(self, fullname, &namespace_portion)) {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700391 case FL_ERROR:
Benjamin Petersona6a7a1a2012-05-25 00:22:04 -0700392 return NULL;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700393 case FL_NS_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700394 /* A namespace portion is not allowed via find_module, so return None. */
Eric V. Smith984b11f2012-05-24 20:21:04 -0400395 Py_DECREF(namespace_portion);
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700396 /* FALL THROUGH */
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700397 case FL_NOT_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700398 result = Py_None;
399 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700400 case FL_MODULE_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700401 result = (PyObject *)self;
402 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000403 }
Benjamin Petersona6a7a1a2012-05-25 00:22:04 -0700404 Py_INCREF(result);
Benjamin Peterson2d12e142012-05-25 00:19:40 -0700405 return result;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400406}
407
408
409/* Check whether we can satisfy the import of the module named by
410 'fullname', or whether it could be a portion of a namespace
411 package. Return self if we can load it, a string containing the
412 full path if it's a possible namespace portion, None if we
413 can't load it. */
414static PyObject *
415zipimporter_find_loader(PyObject *obj, PyObject *args)
416{
417 ZipImporter *self = (ZipImporter *)obj;
418 PyObject *path = NULL;
419 PyObject *fullname;
420 PyObject *result = NULL;
421 PyObject *namespace_portion = NULL;
422
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700423 if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module", &fullname, &path))
424 return NULL;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400425
426 switch (find_loader(self, fullname, &namespace_portion)) {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700427 case FL_ERROR:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700428 return NULL;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700429 case FL_NOT_FOUND: /* Not found, return (None, []) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700430 result = Py_BuildValue("O[]", Py_None);
431 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700432 case FL_MODULE_FOUND: /* Return (self, []) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700433 result = Py_BuildValue("O[]", self);
434 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700435 case FL_NS_FOUND: /* Return (None, [namespace_portion]) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700436 result = Py_BuildValue("O[O]", Py_None, namespace_portion);
Benjamin Peterson209e04c2012-05-24 22:35:39 -0700437 Py_DECREF(namespace_portion);
Eric V. Smith984b11f2012-05-24 20:21:04 -0400438 return result;
439 }
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700440 return result;
Just van Rossum52e14d62002-12-30 22:08:05 +0000441}
442
443/* Load and return the module named by 'fullname'. */
444static PyObject *
445zipimporter_load_module(PyObject *obj, PyObject *args)
446{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000447 ZipImporter *self = (ZipImporter *)obj;
Victor Stinner26fabe12010-10-18 12:03:25 +0000448 PyObject *code = NULL, *mod, *dict;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400449 PyObject *fullname;
450 PyObject *modpath = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000451 int ispackage;
Just van Rossum52e14d62002-12-30 22:08:05 +0000452
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400453 if (!PyArg_ParseTuple(args, "U:zipimporter.load_module",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 &fullname))
455 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200456 if (PyUnicode_READY(fullname) == -1)
457 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000458
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 code = get_module_code(self, fullname, &ispackage, &modpath);
460 if (code == NULL)
Victor Stinner26fabe12010-10-18 12:03:25 +0000461 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000462
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400463 mod = PyImport_AddModuleObject(fullname);
Victor Stinner26fabe12010-10-18 12:03:25 +0000464 if (mod == NULL)
465 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000466 dict = PyModule_GetDict(mod);
Just van Rossum52e14d62002-12-30 22:08:05 +0000467
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000468 /* mod.__loader__ = self */
469 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
470 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000471
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000472 if (ispackage) {
473 /* add __path__ to the module *before* the code gets
474 executed */
Victor Stinneraf8b7e82013-10-29 01:46:24 +0100475 PyObject *pkgpath, *fullpath, *subname;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000476 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000477
Victor Stinneraf8b7e82013-10-29 01:46:24 +0100478 subname = get_subname(fullname);
479 if (subname == NULL)
480 goto error;
481
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400482 fullpath = PyUnicode_FromFormat("%U%c%U%U",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000483 self->archive, SEP,
484 self->prefix, subname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400485 Py_DECREF(subname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000486 if (fullpath == NULL)
487 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000488
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400489 pkgpath = Py_BuildValue("[N]", fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000490 if (pkgpath == NULL)
491 goto error;
492 err = PyDict_SetItemString(dict, "__path__", pkgpath);
493 Py_DECREF(pkgpath);
494 if (err != 0)
495 goto error;
496 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400497 mod = PyImport_ExecCodeModuleObject(fullname, code, modpath, NULL);
Victor Stinner26fabe12010-10-18 12:03:25 +0000498 Py_CLEAR(code);
499 if (mod == NULL)
500 goto error;
501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000502 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400503 PySys_FormatStderr("import %U # loaded from Zip %U\n",
Victor Stinner08654e12010-10-18 12:09:02 +0000504 fullname, modpath);
505 Py_DECREF(modpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000506 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +0000507error:
Victor Stinner26fabe12010-10-18 12:03:25 +0000508 Py_XDECREF(code);
Victor Stinner08654e12010-10-18 12:09:02 +0000509 Py_XDECREF(modpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000511}
512
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000513/* Return a string matching __file__ for the named module */
514static PyObject *
515zipimporter_get_filename(PyObject *obj, PyObject *args)
516{
517 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400518 PyObject *fullname, *code, *modpath;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000519 int ispackage;
520
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400521 if (!PyArg_ParseTuple(args, "U:zipimporter.get_filename",
Victor Stinner9e40fad2010-10-18 22:34:46 +0000522 &fullname))
Victor Stinnerc342fca2010-10-18 11:39:05 +0000523 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000524
525 /* Deciding the filename requires working out where the code
526 would come from if the module was actually loaded */
527 code = get_module_code(self, fullname, &ispackage, &modpath);
528 if (code == NULL)
Victor Stinnerc342fca2010-10-18 11:39:05 +0000529 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000530 Py_DECREF(code); /* Only need the path info */
531
Victor Stinner08654e12010-10-18 12:09:02 +0000532 return modpath;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000533}
534
Just van Rossum52e14d62002-12-30 22:08:05 +0000535/* Return a bool signifying whether the module is a package or not. */
536static PyObject *
537zipimporter_is_package(PyObject *obj, PyObject *args)
538{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400540 PyObject *fullname;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000541 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000542
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400543 if (!PyArg_ParseTuple(args, "U:zipimporter.is_package",
Victor Stinner965a8a12010-10-18 21:44:33 +0000544 &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000545 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000546
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000547 mi = get_module_info(self, fullname);
548 if (mi == MI_ERROR)
Victor Stinner965a8a12010-10-18 21:44:33 +0000549 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000550 if (mi == MI_NOT_FOUND) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400551 PyErr_Format(ZipImportError, "can't find module %R", fullname);
Victor Stinner965a8a12010-10-18 21:44:33 +0000552 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000553 }
554 return PyBool_FromLong(mi == MI_PACKAGE);
Just van Rossum52e14d62002-12-30 22:08:05 +0000555}
556
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200557
Just van Rossum52e14d62002-12-30 22:08:05 +0000558static PyObject *
559zipimporter_get_data(PyObject *obj, PyObject *args)
560{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000561 ZipImporter *self = (ZipImporter *)obj;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100562 PyObject *path, *key;
Benjamin Peterson34c15402014-02-16 14:17:28 -0500563 PyObject *toc_entry;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100564 Py_ssize_t path_start, path_len, len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000565
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100566 if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &path))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000568
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200569#ifdef ALTSEP
Martin v. Löwiscfa61292011-10-31 09:01:22 +0100570 path = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100571 if (!path)
572 return NULL;
573#else
574 Py_INCREF(path);
Just van Rossum52e14d62002-12-30 22:08:05 +0000575#endif
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100576 if (PyUnicode_READY(path) == -1)
577 goto error;
578
579 path_len = PyUnicode_GET_LENGTH(path);
580
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200581 len = PyUnicode_GET_LENGTH(self->archive);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100582 path_start = 0;
583 if (PyUnicode_Tailmatch(path, self->archive, 0, len, -1)
584 && PyUnicode_READ_CHAR(path, len) == SEP) {
585 path_start = len + 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000586 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000587
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100588 key = PyUnicode_Substring(path, path_start, path_len);
Victor Stinner60fe8d92010-08-16 23:48:11 +0000589 if (key == NULL)
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100590 goto error;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000591 toc_entry = PyDict_GetItem(self->files, key);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 if (toc_entry == NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000593 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key);
594 Py_DECREF(key);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100595 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000596 }
Victor Stinner60fe8d92010-08-16 23:48:11 +0000597 Py_DECREF(key);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100598 Py_DECREF(path);
Benjamin Peterson34c15402014-02-16 14:17:28 -0500599 return get_data(self->archive, toc_entry);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100600 error:
601 Py_DECREF(path);
602 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000603}
604
605static PyObject *
606zipimporter_get_code(PyObject *obj, PyObject *args)
607{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000608 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400609 PyObject *fullname;
Just van Rossum52e14d62002-12-30 22:08:05 +0000610
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400611 if (!PyArg_ParseTuple(args, "U:zipimporter.get_code", &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000612 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 return get_module_code(self, fullname, NULL, NULL);
Just van Rossum52e14d62002-12-30 22:08:05 +0000615}
616
617static PyObject *
618zipimporter_get_source(PyObject *obj, PyObject *args)
619{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000620 ZipImporter *self = (ZipImporter *)obj;
621 PyObject *toc_entry;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400622 PyObject *fullname, *subname, *path, *fullpath;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000623 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000624
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400625 if (!PyArg_ParseTuple(args, "U:zipimporter.get_source", &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000626 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000628 mi = get_module_info(self, fullname);
Victor Stinner965a8a12010-10-18 21:44:33 +0000629 if (mi == MI_ERROR)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000630 return NULL;
Victor Stinner04106562010-10-18 20:44:08 +0000631 if (mi == MI_NOT_FOUND) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400632 PyErr_Format(ZipImportError, "can't find module %R", fullname);
Victor Stinner04106562010-10-18 20:44:08 +0000633 return NULL;
634 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400635
Victor Stinner965a8a12010-10-18 21:44:33 +0000636 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400637 if (subname == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000638 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000639
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400640 path = make_filename(self->prefix, subname);
641 Py_DECREF(subname);
642 if (path == NULL)
643 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000644
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400645 if (mi == MI_PACKAGE)
646 fullpath = PyUnicode_FromFormat("%U%c__init__.py", path, SEP);
647 else
648 fullpath = PyUnicode_FromFormat("%U.py", path);
649 Py_DECREF(path);
650 if (fullpath == NULL)
651 return NULL;
652
653 toc_entry = PyDict_GetItem(self->files, fullpath);
654 Py_DECREF(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000655 if (toc_entry != NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000656 PyObject *res, *bytes;
Benjamin Peterson34c15402014-02-16 14:17:28 -0500657 bytes = get_data(self->archive, toc_entry);
Victor Stinner60fe8d92010-08-16 23:48:11 +0000658 if (bytes == NULL)
659 return NULL;
660 res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
661 PyBytes_GET_SIZE(bytes));
662 Py_DECREF(bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 return res;
664 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000665
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000666 /* we have the module, but no source */
Benjamin Peterson34c15402014-02-16 14:17:28 -0500667 Py_INCREF(Py_None);
668 return Py_None;
Just van Rossum52e14d62002-12-30 22:08:05 +0000669}
670
671PyDoc_STRVAR(doc_find_module,
672"find_module(fullname, path=None) -> self or None.\n\
673\n\
674Search for a module specified by 'fullname'. 'fullname' must be the\n\
675fully qualified (dotted) module name. It returns the zipimporter\n\
676instance itself if the module was found, or None if it wasn't.\n\
677The optional 'path' argument is ignored -- it's there for compatibility\n\
678with the importer protocol.");
679
Eric V. Smith984b11f2012-05-24 20:21:04 -0400680PyDoc_STRVAR(doc_find_loader,
681"find_loader(fullname, path=None) -> self, str or None.\n\
682\n\
683Search for a module specified by 'fullname'. 'fullname' must be the\n\
684fully qualified (dotted) module name. It returns the zipimporter\n\
685instance itself if the module was found, a string containing the\n\
686full path name if it's possibly a portion of a namespace package,\n\
687or None otherwise. The optional 'path' argument is ignored -- it's\n\
688 there for compatibility with the importer protocol.");
689
Just van Rossum52e14d62002-12-30 22:08:05 +0000690PyDoc_STRVAR(doc_load_module,
691"load_module(fullname) -> module.\n\
692\n\
693Load the module specified by 'fullname'. 'fullname' must be the\n\
694fully qualified (dotted) module name. It returns the imported\n\
695module, or raises ZipImportError if it wasn't found.");
696
697PyDoc_STRVAR(doc_get_data,
698"get_data(pathname) -> string with file data.\n\
699\n\
700Return the data associated with 'pathname'. Raise IOError if\n\
701the file wasn't found.");
702
703PyDoc_STRVAR(doc_is_package,
704"is_package(fullname) -> bool.\n\
705\n\
706Return True if the module specified by fullname is a package.\n\
Brian Curtin32839732010-07-21 01:44:19 +0000707Raise ZipImportError if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000708
709PyDoc_STRVAR(doc_get_code,
710"get_code(fullname) -> code object.\n\
711\n\
712Return the code object for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000713if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000714
715PyDoc_STRVAR(doc_get_source,
716"get_source(fullname) -> source string.\n\
717\n\
718Return the source code for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000719if the module couldn't be found, return None if the archive does\n\
Just van Rossum52e14d62002-12-30 22:08:05 +0000720contain the module, but has no source for it.");
721
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000722
723PyDoc_STRVAR(doc_get_filename,
Nick Coghlan9a1d6e32009-02-08 03:37:27 +0000724"get_filename(fullname) -> filename string.\n\
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000725\n\
726Return the filename for the specified module.");
727
Just van Rossum52e14d62002-12-30 22:08:05 +0000728static PyMethodDef zipimporter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 {"find_module", zipimporter_find_module, METH_VARARGS,
730 doc_find_module},
Eric V. Smith984b11f2012-05-24 20:21:04 -0400731 {"find_loader", zipimporter_find_loader, METH_VARARGS,
732 doc_find_loader},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000733 {"load_module", zipimporter_load_module, METH_VARARGS,
734 doc_load_module},
735 {"get_data", zipimporter_get_data, METH_VARARGS,
736 doc_get_data},
737 {"get_code", zipimporter_get_code, METH_VARARGS,
738 doc_get_code},
739 {"get_source", zipimporter_get_source, METH_VARARGS,
740 doc_get_source},
741 {"get_filename", zipimporter_get_filename, METH_VARARGS,
742 doc_get_filename},
743 {"is_package", zipimporter_is_package, METH_VARARGS,
744 doc_is_package},
745 {NULL, NULL} /* sentinel */
Just van Rossum52e14d62002-12-30 22:08:05 +0000746};
747
748static PyMemberDef zipimporter_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000749 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
750 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
751 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
752 {NULL}
Just van Rossum52e14d62002-12-30 22:08:05 +0000753};
754
755PyDoc_STRVAR(zipimporter_doc,
756"zipimporter(archivepath) -> zipimporter object\n\
757\n\
758Create a new zipimporter instance. 'archivepath' must be a path to\n\
Alexandre Vassalotti8ae3e052008-05-16 00:41:41 +0000759a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
760'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
761valid directory inside the archive.\n\
762\n\
763'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
764archive.\n\
765\n\
766The 'archive' attribute of zipimporter objects contains the name of the\n\
767zipfile targeted.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000768
769#define DEFERRED_ADDRESS(ADDR) 0
770
771static PyTypeObject ZipImporter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000772 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
773 "zipimport.zipimporter",
774 sizeof(ZipImporter),
775 0, /* tp_itemsize */
776 (destructor)zipimporter_dealloc, /* tp_dealloc */
777 0, /* tp_print */
778 0, /* tp_getattr */
779 0, /* tp_setattr */
780 0, /* tp_reserved */
781 (reprfunc)zipimporter_repr, /* tp_repr */
782 0, /* tp_as_number */
783 0, /* tp_as_sequence */
784 0, /* tp_as_mapping */
785 0, /* tp_hash */
786 0, /* tp_call */
787 0, /* tp_str */
788 PyObject_GenericGetAttr, /* tp_getattro */
789 0, /* tp_setattro */
790 0, /* tp_as_buffer */
791 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
792 Py_TPFLAGS_HAVE_GC, /* tp_flags */
793 zipimporter_doc, /* tp_doc */
794 zipimporter_traverse, /* tp_traverse */
795 0, /* tp_clear */
796 0, /* tp_richcompare */
797 0, /* tp_weaklistoffset */
798 0, /* tp_iter */
799 0, /* tp_iternext */
800 zipimporter_methods, /* tp_methods */
801 zipimporter_members, /* tp_members */
802 0, /* tp_getset */
803 0, /* tp_base */
804 0, /* tp_dict */
805 0, /* tp_descr_get */
806 0, /* tp_descr_set */
807 0, /* tp_dictoffset */
808 (initproc)zipimporter_init, /* tp_init */
809 PyType_GenericAlloc, /* tp_alloc */
810 PyType_GenericNew, /* tp_new */
811 PyObject_GC_Del, /* tp_free */
Just van Rossum52e14d62002-12-30 22:08:05 +0000812};
813
814
815/* implementation */
816
Just van Rossum52e14d62002-12-30 22:08:05 +0000817/* Given a buffer, return the long that is represented by the first
818 4 bytes, encoded as little endian. This partially reimplements
819 marshal.c:r_long() */
820static long
821get_long(unsigned char *buf) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000822 long x;
823 x = buf[0];
824 x |= (long)buf[1] << 8;
825 x |= (long)buf[2] << 16;
826 x |= (long)buf[3] << 24;
Just van Rossum52e14d62002-12-30 22:08:05 +0000827#if SIZEOF_LONG > 4
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000828 /* Sign extension for 64-bit machines */
829 x |= -(x & 0x80000000L);
Just van Rossum52e14d62002-12-30 22:08:05 +0000830#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000831 return x;
Just van Rossum52e14d62002-12-30 22:08:05 +0000832}
833
Gregory P. Smith2bcbc142014-01-07 18:30:07 -0800834/*
Benjamin Peterson34c15402014-02-16 14:17:28 -0500835 read_directory(archive) -> files dict (new reference)
Gregory P. Smith2bcbc142014-01-07 18:30:07 -0800836
Benjamin Peterson34c15402014-02-16 14:17:28 -0500837 Given a path to a Zip archive, build a dict, mapping file names
Just van Rossum52e14d62002-12-30 22:08:05 +0000838 (local to the archive, using SEP as a separator) to toc entries.
839
840 A toc_entry is a tuple:
841
Victor Stinner08654e12010-10-18 12:09:02 +0000842 (__file__, # value to use for __file__, available for all files,
843 # encoded to the filesystem encoding
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000844 compress, # compression kind; 0 for uncompressed
845 data_size, # size of compressed data on disk
846 file_size, # size of decompressed data
847 file_offset, # offset of file header from start of archive
848 time, # mod time of file (in dos format)
849 date, # mod data of file (in dos format)
850 crc, # crc checksum of the data
Victor Stinnerc342fca2010-10-18 11:39:05 +0000851 )
Just van Rossum52e14d62002-12-30 22:08:05 +0000852
853 Directories can be recognized by the trailing SEP in the name,
854 data_size and file_offset are 0.
855*/
856static PyObject *
Benjamin Peterson34c15402014-02-16 14:17:28 -0500857read_directory(PyObject *archive)
Just van Rossum52e14d62002-12-30 22:08:05 +0000858{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000859 PyObject *files = NULL;
Benjamin Peterson34c15402014-02-16 14:17:28 -0500860 FILE *fp;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000861 unsigned short flags;
Gregory P. Smithab320662012-01-30 15:17:33 -0800862 short compress, time, date, name_size;
863 long crc, data_size, file_size, header_size;
864 Py_ssize_t file_offset, header_position, header_offset;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200865 long l, count;
866 Py_ssize_t i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 char name[MAXPATHLEN + 5];
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200868 char dummy[8]; /* Buffer to read unused header values into */
Victor Stinner2460a432010-08-16 17:54:28 +0000869 PyObject *nameobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 char *p, endof_central_dir[22];
Gregory P. Smithab320662012-01-30 15:17:33 -0800871 Py_ssize_t arc_offset; /* Absolute offset to start of the zip-archive. */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100872 PyObject *path;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000873 const char *charset;
Victor Stinner4ee65a92011-01-22 10:30:29 +0000874 int bootstrap;
Just van Rossum52e14d62002-12-30 22:08:05 +0000875
Benjamin Peterson34c15402014-02-16 14:17:28 -0500876 fp = _Py_fopen_obj(archive, "rb");
877 if (fp == NULL) {
878 if (!PyErr_Occurred())
879 PyErr_Format(ZipImportError, "can't open Zip file: %R", archive);
880 return NULL;
881 }
882
Jesus Cea09bf7a72012-10-03 02:13:05 +0200883 if (fseek(fp, -22, SEEK_END) == -1) {
Benjamin Peterson34c15402014-02-16 14:17:28 -0500884 fclose(fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +0200885 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
886 return NULL;
887 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000888 header_position = ftell(fp);
889 if (fread(endof_central_dir, 1, 22, fp) != 22) {
Benjamin Peterson34c15402014-02-16 14:17:28 -0500890 fclose(fp);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400891 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000892 return NULL;
893 }
894 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
895 /* Bad: End of Central Dir signature */
Benjamin Peterson34c15402014-02-16 14:17:28 -0500896 fclose(fp);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400897 PyErr_Format(ZipImportError, "not a Zip file: %R", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000898 return NULL;
899 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000900
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000901 header_size = get_long((unsigned char *)endof_central_dir + 12);
902 header_offset = get_long((unsigned char *)endof_central_dir + 16);
903 arc_offset = header_position - header_offset - header_size;
904 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000905
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000906 files = PyDict_New();
907 if (files == NULL)
908 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000909
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000910 /* Start of Central Directory */
911 count = 0;
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200912 if (fseek(fp, header_offset, 0) == -1)
913 goto file_error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000914 for (;;) {
915 PyObject *t;
916 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000917
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200918 /* Start of file header */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000919 l = PyMarshal_ReadLongFromFile(fp);
Victor Stinner73660af2013-10-29 01:43:44 +0100920 if (l == -1 && PyErr_Occurred())
921 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000922 if (l != 0x02014B50)
923 break; /* Bad: Central Dir File Header */
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200924
925 /* On Windows, calling fseek to skip over the fields we don't use is
926 slower than reading the data into a dummy buffer because fseek flushes
927 stdio's internal buffers. See issue #8745. */
928 if (fread(dummy, 1, 4, fp) != 4) /* Skip unused fields, avoid fseek */
929 goto file_error;
930
Victor Stinnerd36c8212010-10-18 12:13:46 +0000931 flags = (unsigned short)PyMarshal_ReadShortFromFile(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000932 compress = PyMarshal_ReadShortFromFile(fp);
933 time = PyMarshal_ReadShortFromFile(fp);
934 date = PyMarshal_ReadShortFromFile(fp);
935 crc = PyMarshal_ReadLongFromFile(fp);
936 data_size = PyMarshal_ReadLongFromFile(fp);
937 file_size = PyMarshal_ReadLongFromFile(fp);
938 name_size = PyMarshal_ReadShortFromFile(fp);
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200939 header_size = name_size +
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000940 PyMarshal_ReadShortFromFile(fp) +
941 PyMarshal_ReadShortFromFile(fp);
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200942 if (fread(dummy, 1, 8, fp) != 8) /* Skip unused fields, avoid fseek */
943 goto file_error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000944 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Victor Stinner73660af2013-10-29 01:43:44 +0100945 if (PyErr_Occurred())
946 goto error;
947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000948 if (name_size > MAXPATHLEN)
949 name_size = MAXPATHLEN;
Just van Rossum52e14d62002-12-30 22:08:05 +0000950
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000951 p = name;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200952 for (i = 0; i < (Py_ssize_t)name_size; i++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000953 *p = (char)getc(fp);
954 if (*p == '/')
955 *p = SEP;
956 p++;
957 }
958 *p = 0; /* Add terminating null byte */
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200959 for (; i < header_size; i++) /* Skip the rest of the header */
960 if(getc(fp) == EOF) /* Avoid fseek */
961 goto file_error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000962
Victor Stinner4ee65a92011-01-22 10:30:29 +0000963 bootstrap = 0;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000964 if (flags & 0x0800)
965 charset = "utf-8";
Victor Stinner4ee65a92011-01-22 10:30:29 +0000966 else if (!PyThreadState_GET()->interp->codecs_initialized) {
967 /* During bootstrap, we may need to load the encodings
968 package from a ZIP file. But the cp437 encoding is implemented
969 in Python in the encodings package.
970
971 Break out of this dependency by assuming that the path to
972 the encodings module is ASCII-only. */
973 charset = "ascii";
974 bootstrap = 1;
975 }
Victor Stinnerd36c8212010-10-18 12:13:46 +0000976 else
977 charset = "cp437";
978 nameobj = PyUnicode_Decode(name, name_size, charset, NULL);
Victor Stinner4ee65a92011-01-22 10:30:29 +0000979 if (nameobj == NULL) {
980 if (bootstrap)
981 PyErr_Format(PyExc_NotImplementedError,
982 "bootstrap issue: python%i%i.zip contains non-ASCII "
983 "filenames without the unicode flag",
984 PY_MAJOR_VERSION, PY_MINOR_VERSION);
Victor Stinner2460a432010-08-16 17:54:28 +0000985 goto error;
Victor Stinner4ee65a92011-01-22 10:30:29 +0000986 }
Stefan Krah000fde92012-08-20 14:14:49 +0200987 if (PyUnicode_READY(nameobj) == -1)
988 goto error;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100989 path = PyUnicode_FromFormat("%U%c%U", archive, SEP, nameobj);
990 if (path == NULL)
Victor Stinner2460a432010-08-16 17:54:28 +0000991 goto error;
Gregory P. Smithcc6abd52012-01-30 15:55:29 -0800992 t = Py_BuildValue("Nhllnhhl", path, compress, data_size,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000993 file_size, file_offset, time, date, crc);
994 if (t == NULL)
995 goto error;
Victor Stinner2460a432010-08-16 17:54:28 +0000996 err = PyDict_SetItem(files, nameobj, t);
997 Py_CLEAR(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000998 Py_DECREF(t);
999 if (err != 0)
1000 goto error;
1001 count++;
1002 }
Benjamin Peterson34c15402014-02-16 14:17:28 -05001003 fclose(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001004 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001005 PySys_FormatStderr("# zipimport: found %ld names in %R\n",
1006 count, archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001007 return files;
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +02001008file_error:
Benjamin Peterson34c15402014-02-16 14:17:28 -05001009 fclose(fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001010 Py_XDECREF(files);
1011 Py_XDECREF(nameobj);
1012 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1013 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001014error:
Benjamin Peterson34c15402014-02-16 14:17:28 -05001015 fclose(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001016 Py_XDECREF(files);
Victor Stinner2460a432010-08-16 17:54:28 +00001017 Py_XDECREF(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001018 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001019}
1020
1021/* Return the zlib.decompress function object, or NULL if zlib couldn't
1022 be imported. The function is cached when found, so subsequent calls
Victor Stinner4925cde2011-05-20 00:16:09 +02001023 don't import zlib again. */
Just van Rossum52e14d62002-12-30 22:08:05 +00001024static PyObject *
1025get_decompress_func(void)
1026{
Victor Stinner4925cde2011-05-20 00:16:09 +02001027 static int importing_zlib = 0;
1028 PyObject *zlib;
1029 PyObject *decompress;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001030 _Py_IDENTIFIER(decompress);
Just van Rossum52e14d62002-12-30 22:08:05 +00001031
Victor Stinner4925cde2011-05-20 00:16:09 +02001032 if (importing_zlib != 0)
1033 /* Someone has a zlib.py[co] in their Zip file;
1034 let's avoid a stack overflow. */
1035 return NULL;
1036 importing_zlib = 1;
1037 zlib = PyImport_ImportModuleNoBlock("zlib");
1038 importing_zlib = 0;
1039 if (zlib != NULL) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001040 decompress = _PyObject_GetAttrId(zlib,
1041 &PyId_decompress);
Victor Stinner4925cde2011-05-20 00:16:09 +02001042 Py_DECREF(zlib);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001043 }
Victor Stinner4925cde2011-05-20 00:16:09 +02001044 else {
1045 PyErr_Clear();
1046 decompress = NULL;
1047 }
1048 if (Py_VerboseFlag)
1049 PySys_WriteStderr("# zipimport: zlib %s\n",
1050 zlib != NULL ? "available": "UNAVAILABLE");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001051 return decompress;
Just van Rossum52e14d62002-12-30 22:08:05 +00001052}
1053
Benjamin Peterson34c15402014-02-16 14:17:28 -05001054/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
Just van Rossum52e14d62002-12-30 22:08:05 +00001055 data as a new reference. */
1056static PyObject *
Benjamin Peterson34c15402014-02-16 14:17:28 -05001057get_data(PyObject *archive, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +00001058{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 PyObject *raw_data, *data = NULL, *decompress;
1060 char *buf;
Benjamin Peterson34c15402014-02-16 14:17:28 -05001061 FILE *fp;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001062 int err;
1063 Py_ssize_t bytes_read = 0;
1064 long l;
Victor Stinner60fe8d92010-08-16 23:48:11 +00001065 PyObject *datapath;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 long compress, data_size, file_size, file_offset, bytes_size;
1067 long time, date, crc;
Just van Rossum52e14d62002-12-30 22:08:05 +00001068
Victor Stinner60fe8d92010-08-16 23:48:11 +00001069 if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 &data_size, &file_size, &file_offset, &time,
1071 &date, &crc)) {
1072 return NULL;
1073 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001074
Benjamin Peterson34c15402014-02-16 14:17:28 -05001075 fp = _Py_fopen_obj(archive, "rb");
1076 if (!fp) {
1077 if (!PyErr_Occurred())
1078 PyErr_Format(PyExc_IOError,
1079 "zipimport: can not open file %U", archive);
1080 return NULL;
1081 }
1082
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001083 /* Check to make sure the local file header is correct */
Jesus Cea09bf7a72012-10-03 02:13:05 +02001084 if (fseek(fp, file_offset, 0) == -1) {
Benjamin Peterson34c15402014-02-16 14:17:28 -05001085 fclose(fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001086 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1087 return NULL;
1088 }
1089
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 l = PyMarshal_ReadLongFromFile(fp);
1091 if (l != 0x04034B50) {
1092 /* Bad: Local File Header */
Victor Stinner73660af2013-10-29 01:43:44 +01001093 if (!PyErr_Occurred())
1094 PyErr_Format(ZipImportError,
1095 "bad local file header in %U",
1096 archive);
Benjamin Peterson34c15402014-02-16 14:17:28 -05001097 fclose(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001098 return NULL;
1099 }
Jesus Cea09bf7a72012-10-03 02:13:05 +02001100 if (fseek(fp, file_offset + 26, 0) == -1) {
Benjamin Peterson34c15402014-02-16 14:17:28 -05001101 fclose(fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001102 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1103 return NULL;
1104 }
1105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 l = 30 + PyMarshal_ReadShortFromFile(fp) +
1107 PyMarshal_ReadShortFromFile(fp); /* local header size */
Victor Stinner73660af2013-10-29 01:43:44 +01001108 if (PyErr_Occurred()) {
Benjamin Peterson34c15402014-02-16 14:17:28 -05001109 fclose(fp);
Victor Stinner73660af2013-10-29 01:43:44 +01001110 return NULL;
1111 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001112 file_offset += l; /* Start of file data */
Just van Rossum52e14d62002-12-30 22:08:05 +00001113
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 bytes_size = compress == 0 ? data_size : data_size + 1;
1115 if (bytes_size == 0)
1116 bytes_size++;
1117 raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
Just van Rossum52e14d62002-12-30 22:08:05 +00001118
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001119 if (raw_data == NULL) {
Benjamin Peterson34c15402014-02-16 14:17:28 -05001120 fclose(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001121 return NULL;
1122 }
1123 buf = PyBytes_AsString(raw_data);
Just van Rossum52e14d62002-12-30 22:08:05 +00001124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 err = fseek(fp, file_offset, 0);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001126 if (err == 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001127 bytes_read = fread(buf, 1, data_size, fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001128 } else {
Benjamin Peterson34c15402014-02-16 14:17:28 -05001129 fclose(fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001130 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1131 return NULL;
1132 }
Benjamin Peterson34c15402014-02-16 14:17:28 -05001133 fclose(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 if (err || bytes_read != data_size) {
1135 PyErr_SetString(PyExc_IOError,
1136 "zipimport: can't read data");
1137 Py_DECREF(raw_data);
1138 return NULL;
1139 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 if (compress != 0) {
1142 buf[data_size] = 'Z'; /* saw this in zipfile.py */
1143 data_size++;
1144 }
1145 buf[data_size] = '\0';
Just van Rossum52e14d62002-12-30 22:08:05 +00001146
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 if (compress == 0) { /* data is not compressed */
1148 data = PyBytes_FromStringAndSize(buf, data_size);
1149 Py_DECREF(raw_data);
1150 return data;
1151 }
1152
1153 /* Decompress with zlib */
1154 decompress = get_decompress_func();
1155 if (decompress == NULL) {
1156 PyErr_SetString(ZipImportError,
1157 "can't decompress data; "
1158 "zlib not available");
1159 goto error;
1160 }
1161 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Victor Stinner4925cde2011-05-20 00:16:09 +02001162 Py_DECREF(decompress);
Just van Rossum52e14d62002-12-30 22:08:05 +00001163error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 Py_DECREF(raw_data);
1165 return data;
Just van Rossum52e14d62002-12-30 22:08:05 +00001166}
1167
1168/* Lenient date/time comparison function. The precision of the mtime
1169 in the archive is lower than the mtime stored in a .pyc: we
1170 must allow a difference of at most one second. */
1171static int
1172eq_mtime(time_t t1, time_t t2)
1173{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 time_t d = t1 - t2;
1175 if (d < 0)
1176 d = -d;
1177 /* dostime only stores even seconds, so be lenient */
1178 return d <= 1;
Just van Rossum52e14d62002-12-30 22:08:05 +00001179}
1180
1181/* Given the contents of a .py[co] file in a buffer, unmarshal the data
1182 and return the code object. Return None if it the magic word doesn't
1183 match (we do this instead of raising an exception as we fall back
1184 to .py if available and we don't want to mask other errors).
1185 Returns a new reference. */
1186static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001187unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
Just van Rossum52e14d62002-12-30 22:08:05 +00001188{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001189 PyObject *code;
1190 char *buf = PyBytes_AsString(data);
1191 Py_ssize_t size = PyBytes_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +00001192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 if (size <= 9) {
1194 PyErr_SetString(ZipImportError,
1195 "bad pyc data");
1196 return NULL;
1197 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
1200 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001201 PySys_FormatStderr("# %R has bad magic\n",
1202 pathname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 Py_INCREF(Py_None);
1204 return Py_None; /* signal caller to try alternative */
1205 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001206
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001207 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
1208 mtime)) {
1209 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001210 PySys_FormatStderr("# %R has bad mtime\n",
1211 pathname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001212 Py_INCREF(Py_None);
1213 return Py_None; /* signal caller to try alternative */
1214 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001215
Antoine Pitrou5136ac02012-01-13 18:52:16 +01001216 /* XXX the pyc's size field is ignored; timestamp collisions are probably
1217 unimportant with zip files. */
1218 code = PyMarshal_ReadObjectFromString(buf + 12, size - 12);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001219 if (code == NULL)
1220 return NULL;
1221 if (!PyCode_Check(code)) {
1222 Py_DECREF(code);
1223 PyErr_Format(PyExc_TypeError,
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001224 "compiled module %R is not a code object",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001225 pathname);
1226 return NULL;
1227 }
1228 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001229}
1230
1231/* Replace any occurances of "\r\n?" in the input string with "\n".
1232 This converts DOS and Mac line endings to Unix line endings.
1233 Also append a trailing "\n" to be compatible with
1234 PyParser_SimpleParseFile(). Returns a new reference. */
1235static PyObject *
1236normalize_line_endings(PyObject *source)
1237{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001238 char *buf, *q, *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 PyObject *fixed_source;
1240 int len = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +00001241
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001242 p = PyBytes_AsString(source);
1243 if (p == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 return PyBytes_FromStringAndSize("\n\0", 2);
1245 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001246
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001247 /* one char extra for trailing \n and one for terminating \0 */
1248 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1249 if (buf == NULL) {
1250 PyErr_SetString(PyExc_MemoryError,
1251 "zipimport: no memory to allocate "
1252 "source buffer");
1253 return NULL;
1254 }
1255 /* replace "\r\n?" by "\n" */
1256 for (q = buf; *p != '\0'; p++) {
1257 if (*p == '\r') {
1258 *q++ = '\n';
1259 if (*(p + 1) == '\n')
1260 p++;
1261 }
1262 else
1263 *q++ = *p;
1264 len++;
1265 }
1266 *q++ = '\n'; /* add trailing \n */
1267 *q = '\0';
1268 fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1269 PyMem_Free(buf);
1270 return fixed_source;
Just van Rossum52e14d62002-12-30 22:08:05 +00001271}
1272
1273/* Given a string buffer containing Python source code, compile it
Brett Cannon83358c92013-06-20 21:30:32 -04001274 and return a code object as a new reference. */
Just van Rossum52e14d62002-12-30 22:08:05 +00001275static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001276compile_source(PyObject *pathname, PyObject *source)
Just van Rossum52e14d62002-12-30 22:08:05 +00001277{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001278 PyObject *code, *fixed_source, *pathbytes;
Just van Rossum52e14d62002-12-30 22:08:05 +00001279
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001280 pathbytes = PyUnicode_EncodeFSDefault(pathname);
1281 if (pathbytes == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001282 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001283
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001284 fixed_source = normalize_line_endings(source);
1285 if (fixed_source == NULL) {
1286 Py_DECREF(pathbytes);
1287 return NULL;
1288 }
1289
1290 code = Py_CompileString(PyBytes_AsString(fixed_source),
1291 PyBytes_AsString(pathbytes),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001292 Py_file_input);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001293 Py_DECREF(pathbytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001294 Py_DECREF(fixed_source);
1295 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001296}
1297
1298/* Convert the date/time values found in the Zip archive to a value
1299 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001300static time_t
1301parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001302{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001303 struct tm stm;
Just van Rossum52e14d62002-12-30 22:08:05 +00001304
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001305 memset((void *) &stm, '\0', sizeof(stm));
Christian Heimes679db4a2008-01-18 09:56:22 +00001306
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001307 stm.tm_sec = (dostime & 0x1f) * 2;
1308 stm.tm_min = (dostime >> 5) & 0x3f;
1309 stm.tm_hour = (dostime >> 11) & 0x1f;
1310 stm.tm_mday = dosdate & 0x1f;
1311 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1312 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
1313 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001314
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001315 return mktime(&stm);
Just van Rossum52e14d62002-12-30 22:08:05 +00001316}
1317
1318/* Given a path to a .pyc or .pyo file in the archive, return the
Ezio Melotti13925002011-03-16 11:05:33 +02001319 modification time of the matching .py file, or 0 if no source
Just van Rossum52e14d62002-12-30 22:08:05 +00001320 is available. */
1321static time_t
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001322get_mtime_of_source(ZipImporter *self, PyObject *path)
Just van Rossum52e14d62002-12-30 22:08:05 +00001323{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001324 PyObject *toc_entry, *stripped;
1325 time_t mtime;
1326
1327 /* strip 'c' or 'o' from *.py[co] */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001328 if (PyUnicode_READY(path) == -1)
1329 return (time_t)-1;
1330 stripped = PyUnicode_FromKindAndData(PyUnicode_KIND(path),
1331 PyUnicode_DATA(path),
1332 PyUnicode_GET_LENGTH(path) - 1);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001333 if (stripped == NULL)
1334 return (time_t)-1;
1335
1336 toc_entry = PyDict_GetItem(self->files, stripped);
1337 Py_DECREF(stripped);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1339 PyTuple_Size(toc_entry) == 8) {
1340 /* fetch the time stamp of the .py file for comparison
1341 with an embedded pyc time stamp */
1342 int time, date;
1343 time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1344 date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1345 mtime = parse_dostime(time, date);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001346 } else
1347 mtime = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001348 return mtime;
Just van Rossum52e14d62002-12-30 22:08:05 +00001349}
1350
1351/* Return the code object for the module named by 'fullname' from the
1352 Zip archive as a new reference. */
1353static PyObject *
Benjamin Peterson34c15402014-02-16 14:17:28 -05001354get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001355 time_t mtime, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +00001356{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001357 PyObject *data, *modpath, *code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001358
Benjamin Peterson34c15402014-02-16 14:17:28 -05001359 data = get_data(self->archive, toc_entry);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001360 if (data == NULL)
1361 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001362
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001363 modpath = PyTuple_GetItem(toc_entry, 0);
Victor Stinner2a94f4c2010-10-18 12:15:34 +00001364 if (isbytecode)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001365 code = unmarshal_code(modpath, data, mtime);
Victor Stinner2a94f4c2010-10-18 12:15:34 +00001366 else
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001367 code = compile_source(modpath, data);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 Py_DECREF(data);
1369 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001370}
1371
Ezio Melotti42da6632011-03-15 05:18:48 +02001372/* Get the code object associated with the module specified by
Just van Rossum52e14d62002-12-30 22:08:05 +00001373 'fullname'. */
1374static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001375get_module_code(ZipImporter *self, PyObject *fullname,
Victor Stinner08654e12010-10-18 12:09:02 +00001376 int *p_ispackage, PyObject **p_modpath)
Just van Rossum52e14d62002-12-30 22:08:05 +00001377{
Gregory P. Smith95c7c462011-05-21 05:19:42 -07001378 PyObject *code = NULL, *toc_entry, *subname;
Victor Stinner9a2261a2011-05-26 13:59:41 +02001379 PyObject *path, *fullpath = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001380 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +00001381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001383 if (subname == NULL)
1384 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001385
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001386 path = make_filename(self->prefix, subname);
1387 Py_DECREF(subname);
1388 if (path == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001390
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001391 for (zso = zip_searchorder; *zso->suffix; zso++) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001392 code = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001393
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001394 fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
1395 if (fullpath == NULL)
1396 goto exit;
1397
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 if (Py_VerboseFlag > 1)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001399 PySys_FormatStderr("# trying %U%c%U\n",
1400 self->archive, (int)SEP, fullpath);
1401 toc_entry = PyDict_GetItem(self->files, fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001402 if (toc_entry != NULL) {
1403 time_t mtime = 0;
1404 int ispackage = zso->type & IS_PACKAGE;
1405 int isbytecode = zso->type & IS_BYTECODE;
Just van Rossum52e14d62002-12-30 22:08:05 +00001406
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001407 if (isbytecode) {
1408 mtime = get_mtime_of_source(self, fullpath);
1409 if (mtime == (time_t)-1 && PyErr_Occurred()) {
1410 goto exit;
1411 }
1412 }
1413 Py_CLEAR(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 if (p_ispackage != NULL)
1415 *p_ispackage = ispackage;
Benjamin Peterson34c15402014-02-16 14:17:28 -05001416 code = get_code_from_data(self, ispackage,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 isbytecode, mtime,
1418 toc_entry);
1419 if (code == Py_None) {
1420 /* bad magic number or non-matching mtime
1421 in byte code, try next */
1422 Py_DECREF(code);
1423 continue;
1424 }
Victor Stinner08654e12010-10-18 12:09:02 +00001425 if (code != NULL && p_modpath != NULL) {
1426 *p_modpath = PyTuple_GetItem(toc_entry, 0);
1427 Py_INCREF(*p_modpath);
1428 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001429 goto exit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001431 else
1432 Py_CLEAR(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001433 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001434 PyErr_Format(ZipImportError, "can't find module %R", fullname);
1435exit:
1436 Py_DECREF(path);
1437 Py_XDECREF(fullpath);
1438 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001439}
1440
1441
1442/* Module init */
1443
1444PyDoc_STRVAR(zipimport_doc,
1445"zipimport provides support for importing Python modules from Zip archives.\n\
1446\n\
1447This module exports three objects:\n\
1448- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001449- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001450 subclass of ImportError, so it can be caught as ImportError, too.\n\
1451- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1452 info dicts, as used in zipimporter._files.\n\
1453\n\
1454It is usually not needed to use the zipimport module explicitly; it is\n\
1455used by the builtin import mechanism for sys.path items that are paths\n\
1456to Zip archives.");
1457
Martin v. Löwis1a214512008-06-11 05:26:20 +00001458static struct PyModuleDef zipimportmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001459 PyModuleDef_HEAD_INIT,
1460 "zipimport",
1461 zipimport_doc,
1462 -1,
1463 NULL,
1464 NULL,
1465 NULL,
1466 NULL,
1467 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001468};
1469
Just van Rossum52e14d62002-12-30 22:08:05 +00001470PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001471PyInit_zipimport(void)
Just van Rossum52e14d62002-12-30 22:08:05 +00001472{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001473 PyObject *mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001474
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001475 if (PyType_Ready(&ZipImporter_Type) < 0)
1476 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001477
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001478 /* Correct directory separator */
1479 zip_searchorder[0].suffix[0] = SEP;
1480 zip_searchorder[1].suffix[0] = SEP;
1481 zip_searchorder[2].suffix[0] = SEP;
1482 if (Py_OptimizeFlag) {
1483 /* Reverse *.pyc and *.pyo */
1484 struct st_zip_searchorder tmp;
1485 tmp = zip_searchorder[0];
1486 zip_searchorder[0] = zip_searchorder[1];
1487 zip_searchorder[1] = tmp;
1488 tmp = zip_searchorder[3];
1489 zip_searchorder[3] = zip_searchorder[4];
1490 zip_searchorder[4] = tmp;
1491 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 mod = PyModule_Create(&zipimportmodule);
1494 if (mod == NULL)
1495 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001496
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1498 PyExc_ImportError, NULL);
1499 if (ZipImportError == NULL)
1500 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001502 Py_INCREF(ZipImportError);
1503 if (PyModule_AddObject(mod, "ZipImportError",
1504 ZipImportError) < 0)
1505 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001506
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001507 Py_INCREF(&ZipImporter_Type);
1508 if (PyModule_AddObject(mod, "zipimporter",
1509 (PyObject *)&ZipImporter_Type) < 0)
1510 return NULL;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001511
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001512 zip_directory_cache = PyDict_New();
1513 if (zip_directory_cache == NULL)
1514 return NULL;
1515 Py_INCREF(zip_directory_cache);
1516 if (PyModule_AddObject(mod, "_zip_directory_cache",
1517 zip_directory_cache) < 0)
1518 return NULL;
1519 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001520}