blob: 06abb312b35f3b771a76547190cc1a69e0b42582 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000013 char suffix[14];
14 int type;
Just van Rossum52e14d62002-12-30 22:08:05 +000015};
16
Victor Stinner651f9f72013-11-12 21:44:18 +010017#ifdef ALTSEP
18_Py_IDENTIFIER(replace);
19#endif
20
Just van Rossum52e14d62002-12-30 22:08:05 +000021/* zip_searchorder defines how we search for a module in the Zip
22 archive: we first search for a package __init__, then for
Brett Cannonf299abd2015-04-13 14:21:02 -040023 non-package .pyc, and .py entries. The .pyc entries
Just van Rossum52e14d62002-12-30 22:08:05 +000024 are swapped by initzipimport() if we run in optimized mode. Also,
25 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000026static struct st_zip_searchorder zip_searchorder[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000027 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000028 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
29 {".pyc", IS_BYTECODE},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000030 {".py", IS_SOURCE},
31 {"", 0}
Just van Rossum52e14d62002-12-30 22:08:05 +000032};
33
34/* zipimporter object definition and support */
35
36typedef struct _zipimporter ZipImporter;
37
38struct _zipimporter {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000039 PyObject_HEAD
Victor Stinner9e40fad2010-10-18 22:34:46 +000040 PyObject *archive; /* pathname of the Zip archive,
41 decoded from the filesystem encoding */
Victor Stinner72f767e2010-10-18 11:44:21 +000042 PyObject *prefix; /* file prefix: "a/sub/directory/",
43 encoded to the filesystem encoding */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000044 PyObject *files; /* dict with file info {path: toc_entry} */
Just van Rossum52e14d62002-12-30 22:08:05 +000045};
46
Just van Rossum52e14d62002-12-30 22:08:05 +000047static PyObject *ZipImportError;
Victor Stinnerc342fca2010-10-18 11:39:05 +000048/* read_directory() cache */
Just van Rossum52e14d62002-12-30 22:08:05 +000049static PyObject *zip_directory_cache = NULL;
50
51/* forward decls */
Benjamin Peterson34c15402014-02-16 14:17:28 -050052static PyObject *read_directory(PyObject *archive);
53static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
Victor Stinnerf6b563a2011-03-14 20:46:50 -040054static PyObject *get_module_code(ZipImporter *self, PyObject *fullname,
Victor Stinner08654e12010-10-18 12:09:02 +000055 int *p_ispackage, PyObject **p_modpath);
Just van Rossum52e14d62002-12-30 22:08:05 +000056
57
58#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
59
60
61/* zipimporter.__init__
62 Split the "subdirectory" from the Zip archive path, lookup a matching
63 entry in sys.path_importer_cache, fetch the file directory from there
64 if found, or else read it from the archive. */
65static int
66zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
67{
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010068 PyObject *path, *files, *tmp;
69 PyObject *filename = NULL;
70 Py_ssize_t len, flen;
Just van Rossum52e14d62002-12-30 22:08:05 +000071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 if (!_PyArg_NoKeywords("zipimporter()", kwds))
73 return -1;
Georg Brandl02c42872005-08-26 06:42:30 +000074
Victor Stinner2b8dab72010-08-14 14:54:10 +000075 if (!PyArg_ParseTuple(args, "O&:zipimporter",
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010076 PyUnicode_FSDecoder, &path))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +000078
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010079 if (PyUnicode_READY(path) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020080 return -1;
81
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010082 len = PyUnicode_GET_LENGTH(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 if (len == 0) {
84 PyErr_SetString(ZipImportError, "archive path is empty");
Victor Stinner2b8dab72010-08-14 14:54:10 +000085 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 }
Just van Rossum52e14d62002-12-30 22:08:05 +000087
88#ifdef ALTSEP
Martin v. Löwiscfa61292011-10-31 09:01:22 +010089 tmp = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010090 if (!tmp)
91 goto error;
92 Py_DECREF(path);
93 path = tmp;
Just van Rossum52e14d62002-12-30 22:08:05 +000094#endif
95
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010096 filename = path;
97 Py_INCREF(filename);
98 flen = len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000099 for (;;) {
100 struct stat statbuf;
101 int rv;
Just van Rossum52e14d62002-12-30 22:08:05 +0000102
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100103 rv = _Py_stat(filename, &statbuf);
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100104 if (rv == -2)
105 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 if (rv == 0) {
107 /* it exists */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100108 if (!S_ISREG(statbuf.st_mode))
109 /* it's a not file */
110 Py_CLEAR(filename);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 break;
112 }
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100113 Py_CLEAR(filename);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000114 /* back up one path element */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100115 flen = PyUnicode_FindChar(path, SEP, 0, flen, -1);
116 if (flen == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000117 break;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100118 filename = PyUnicode_Substring(path, 0, flen);
Victor Stinneraf8b7e82013-10-29 01:46:24 +0100119 if (filename == NULL)
120 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 }
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100122 if (filename == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 PyErr_SetString(ZipImportError, "not a Zip file");
Victor Stinner2b8dab72010-08-14 14:54:10 +0000124 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000126
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100127 if (PyUnicode_READY(filename) < 0)
128 goto error;
129
130 files = PyDict_GetItem(zip_directory_cache, filename);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000131 if (files == NULL) {
Benjamin Peterson34c15402014-02-16 14:17:28 -0500132 files = read_directory(filename);
133 if (files == NULL)
Victor Stinner2b8dab72010-08-14 14:54:10 +0000134 goto error;
Benjamin Peterson34c15402014-02-16 14:17:28 -0500135 if (PyDict_SetItem(zip_directory_cache, filename, files) != 0)
Victor Stinner2b8dab72010-08-14 14:54:10 +0000136 goto error;
137 }
138 else
139 Py_INCREF(files);
140 self->files = files;
141
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100142 /* Transfer reference */
143 self->archive = filename;
144 filename = NULL;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000145
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100146 /* Check if there is a prefix directory following the filename. */
147 if (flen != len) {
148 tmp = PyUnicode_Substring(path, flen+1,
149 PyUnicode_GET_LENGTH(path));
150 if (tmp == NULL)
151 goto error;
152 self->prefix = tmp;
153 if (PyUnicode_READ_CHAR(path, len-1) != SEP) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 /* add trailing SEP */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100155 tmp = PyUnicode_FromFormat("%U%c", self->prefix, SEP);
156 if (tmp == NULL)
157 goto error;
158 Py_DECREF(self->prefix);
159 self->prefix = tmp;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 }
161 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000162 else
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100163 self->prefix = PyUnicode_New(0, 0);
164 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 return 0;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000166
167error:
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100168 Py_DECREF(path);
169 Py_XDECREF(filename);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000170 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +0000171}
172
173/* GC support. */
174static int
175zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 ZipImporter *self = (ZipImporter *)obj;
178 Py_VISIT(self->files);
179 return 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000180}
181
182static void
183zipimporter_dealloc(ZipImporter *self)
184{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000185 PyObject_GC_UnTrack(self);
186 Py_XDECREF(self->archive);
187 Py_XDECREF(self->prefix);
188 Py_XDECREF(self->files);
189 Py_TYPE(self)->tp_free((PyObject *)self);
Just van Rossum52e14d62002-12-30 22:08:05 +0000190}
191
192static PyObject *
193zipimporter_repr(ZipImporter *self)
194{
Victor Stinner028dd972010-08-17 00:04:48 +0000195 if (self->archive == NULL)
196 return PyUnicode_FromString("<zipimporter object \"???\">");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200197 else if (self->prefix != NULL && PyUnicode_GET_LENGTH(self->prefix) != 0)
Victor Stinner07298a12010-10-18 22:45:54 +0000198 return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">",
Victor Stinner028dd972010-08-17 00:04:48 +0000199 self->archive, SEP, self->prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200 else
Victor Stinner07298a12010-10-18 22:45:54 +0000201 return PyUnicode_FromFormat("<zipimporter object \"%U\">",
Victor Stinner028dd972010-08-17 00:04:48 +0000202 self->archive);
Just van Rossum52e14d62002-12-30 22:08:05 +0000203}
204
205/* return fullname.split(".")[-1] */
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400206static PyObject *
207get_subname(PyObject *fullname)
Just van Rossum52e14d62002-12-30 22:08:05 +0000208{
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100209 Py_ssize_t len, dot;
210 if (PyUnicode_READY(fullname) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200211 return NULL;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100212 len = PyUnicode_GET_LENGTH(fullname);
213 dot = PyUnicode_FindChar(fullname, '.', 0, len, -1);
214 if (dot == -1) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400215 Py_INCREF(fullname);
216 return fullname;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100217 } else
218 return PyUnicode_Substring(fullname, dot+1, len);
Just van Rossum52e14d62002-12-30 22:08:05 +0000219}
220
221/* Given a (sub)modulename, write the potential file path in the
222 archive (without extension) to the path buffer. Return the
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400223 length of the resulting string.
224
225 return self.prefix + name.replace('.', os.sep) */
226static PyObject*
227make_filename(PyObject *prefix, PyObject *name)
Just van Rossum52e14d62002-12-30 22:08:05 +0000228{
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400229 PyObject *pathobj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200230 Py_UCS4 *p, *buf;
231 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000232
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200233 len = PyUnicode_GET_LENGTH(prefix) + PyUnicode_GET_LENGTH(name) + 1;
Serhiy Storchaka1a1ff292015-02-16 13:28:22 +0200234 p = buf = PyMem_New(Py_UCS4, len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200235 if (buf == NULL) {
236 PyErr_NoMemory();
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400237 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200238 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000239
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200240 if (!PyUnicode_AsUCS4(prefix, p, len, 0)) {
241 PyMem_Free(buf);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200242 return NULL;
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200243 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200244 p += PyUnicode_GET_LENGTH(prefix);
245 len -= PyUnicode_GET_LENGTH(prefix);
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200246 if (!PyUnicode_AsUCS4(name, p, len, 1)) {
247 PyMem_Free(buf);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200248 return NULL;
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200249 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400250 for (; *p; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000251 if (*p == '.')
252 *p = SEP;
253 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200254 pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
255 buf, p-buf);
256 PyMem_Free(buf);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400257 return pathobj;
Just van Rossum52e14d62002-12-30 22:08:05 +0000258}
259
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000260enum zi_module_info {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000261 MI_ERROR,
262 MI_NOT_FOUND,
263 MI_MODULE,
264 MI_PACKAGE
Just van Rossum52e14d62002-12-30 22:08:05 +0000265};
266
Eric V. Smith984b11f2012-05-24 20:21:04 -0400267/* Does this path represent a directory?
268 on error, return < 0
269 if not a dir, return 0
270 if a dir, return 1
271*/
272static int
273check_is_directory(ZipImporter *self, PyObject* prefix, PyObject *path)
274{
275 PyObject *dirpath;
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700276 int res;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400277
278 /* See if this is a "directory". If so, it's eligible to be part
279 of a namespace package. We test by seeing if the name, with an
280 appended path separator, exists. */
281 dirpath = PyUnicode_FromFormat("%U%U%c", prefix, path, SEP);
282 if (dirpath == NULL)
283 return -1;
284 /* If dirpath is present in self->files, we have a directory. */
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700285 res = PyDict_Contains(self->files, dirpath);
Eric V. Smith984b11f2012-05-24 20:21:04 -0400286 Py_DECREF(dirpath);
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700287 return res;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400288}
289
Just van Rossum52e14d62002-12-30 22:08:05 +0000290/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000291static enum zi_module_info
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400292get_module_info(ZipImporter *self, PyObject *fullname)
Just van Rossum52e14d62002-12-30 22:08:05 +0000293{
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400294 PyObject *subname;
295 PyObject *path, *fullpath, *item;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +0000297
Victor Stinner965a8a12010-10-18 21:44:33 +0000298 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400299 if (subname == NULL)
300 return MI_ERROR;
Just van Rossum52e14d62002-12-30 22:08:05 +0000301
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400302 path = make_filename(self->prefix, subname);
303 Py_DECREF(subname);
304 if (path == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 return MI_ERROR;
Just van Rossum52e14d62002-12-30 22:08:05 +0000306
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000307 for (zso = zip_searchorder; *zso->suffix; zso++) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400308 fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
309 if (fullpath == NULL) {
310 Py_DECREF(path);
311 return MI_ERROR;
312 }
313 item = PyDict_GetItem(self->files, fullpath);
314 Py_DECREF(fullpath);
315 if (item != NULL) {
316 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000317 if (zso->type & IS_PACKAGE)
318 return MI_PACKAGE;
319 else
320 return MI_MODULE;
321 }
322 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400323 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000324 return MI_NOT_FOUND;
Just van Rossum52e14d62002-12-30 22:08:05 +0000325}
326
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700327typedef enum {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700328 FL_ERROR,
329 FL_NOT_FOUND,
330 FL_MODULE_FOUND,
331 FL_NS_FOUND
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700332} find_loader_result;
333
Eric V. Smith984b11f2012-05-24 20:21:04 -0400334/* The guts of "find_loader" and "find_module". Return values:
335 -1: error
336 0: no loader or namespace portions found
337 1: module/package found
338 2: namespace portion found: *namespace_portion will point to the name
339*/
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700340static find_loader_result
Eric V. Smith984b11f2012-05-24 20:21:04 -0400341find_loader(ZipImporter *self, PyObject *fullname, PyObject **namespace_portion)
342{
343 enum zi_module_info mi;
344
345 *namespace_portion = NULL;
346
347 mi = get_module_info(self, fullname);
348 if (mi == MI_ERROR)
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700349 return FL_ERROR;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400350 if (mi == MI_NOT_FOUND) {
351 /* Not a module or regular package. See if this is a directory, and
352 therefore possibly a portion of a namespace package. */
353 int is_dir = check_is_directory(self, self->prefix, fullname);
354 if (is_dir < 0)
355 return -1;
356 if (is_dir) {
357 /* This is possibly a portion of a namespace
358 package. Return the string representing its path,
359 without a trailing separator. */
360 *namespace_portion = PyUnicode_FromFormat("%U%c%U%U",
361 self->archive, SEP,
362 self->prefix, fullname);
363 if (*namespace_portion == NULL)
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700364 return FL_ERROR;
365 return FL_NS_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400366 }
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700367 return FL_NOT_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400368 }
369 /* This is a module or package. */
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700370 return FL_MODULE_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400371}
372
373
Just van Rossum52e14d62002-12-30 22:08:05 +0000374/* Check whether we can satisfy the import of the module named by
375 'fullname'. Return self if we can, None if we can't. */
376static PyObject *
377zipimporter_find_module(PyObject *obj, PyObject *args)
378{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000379 ZipImporter *self = (ZipImporter *)obj;
380 PyObject *path = NULL;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400381 PyObject *fullname;
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700382 PyObject *namespace_portion = NULL;
383 PyObject *result = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000384
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700385 if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module", &fullname, &path))
386 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000387
Eric V. Smith984b11f2012-05-24 20:21:04 -0400388 switch (find_loader(self, fullname, &namespace_portion)) {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700389 case FL_ERROR:
Benjamin Petersona6a7a1a2012-05-25 00:22:04 -0700390 return NULL;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700391 case FL_NS_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700392 /* A namespace portion is not allowed via find_module, so return None. */
Eric V. Smith984b11f2012-05-24 20:21:04 -0400393 Py_DECREF(namespace_portion);
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700394 /* FALL THROUGH */
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700395 case FL_NOT_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700396 result = Py_None;
397 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700398 case FL_MODULE_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700399 result = (PyObject *)self;
400 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000401 }
Benjamin Petersona6a7a1a2012-05-25 00:22:04 -0700402 Py_INCREF(result);
Benjamin Peterson2d12e142012-05-25 00:19:40 -0700403 return result;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400404}
405
406
407/* Check whether we can satisfy the import of the module named by
408 'fullname', or whether it could be a portion of a namespace
409 package. Return self if we can load it, a string containing the
410 full path if it's a possible namespace portion, None if we
411 can't load it. */
412static PyObject *
413zipimporter_find_loader(PyObject *obj, PyObject *args)
414{
415 ZipImporter *self = (ZipImporter *)obj;
416 PyObject *path = NULL;
417 PyObject *fullname;
418 PyObject *result = NULL;
419 PyObject *namespace_portion = NULL;
420
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700421 if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module", &fullname, &path))
422 return NULL;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400423
424 switch (find_loader(self, fullname, &namespace_portion)) {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700425 case FL_ERROR:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700426 return NULL;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700427 case FL_NOT_FOUND: /* Not found, return (None, []) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700428 result = Py_BuildValue("O[]", Py_None);
429 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700430 case FL_MODULE_FOUND: /* Return (self, []) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700431 result = Py_BuildValue("O[]", self);
432 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700433 case FL_NS_FOUND: /* Return (None, [namespace_portion]) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700434 result = Py_BuildValue("O[O]", Py_None, namespace_portion);
Benjamin Peterson209e04c2012-05-24 22:35:39 -0700435 Py_DECREF(namespace_portion);
Eric V. Smith984b11f2012-05-24 20:21:04 -0400436 return result;
437 }
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700438 return result;
Just van Rossum52e14d62002-12-30 22:08:05 +0000439}
440
441/* Load and return the module named by 'fullname'. */
442static PyObject *
443zipimporter_load_module(PyObject *obj, PyObject *args)
444{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000445 ZipImporter *self = (ZipImporter *)obj;
Victor Stinner26fabe12010-10-18 12:03:25 +0000446 PyObject *code = NULL, *mod, *dict;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400447 PyObject *fullname;
448 PyObject *modpath = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 int ispackage;
Just van Rossum52e14d62002-12-30 22:08:05 +0000450
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400451 if (!PyArg_ParseTuple(args, "U:zipimporter.load_module",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 &fullname))
453 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200454 if (PyUnicode_READY(fullname) == -1)
455 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000456
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000457 code = get_module_code(self, fullname, &ispackage, &modpath);
458 if (code == NULL)
Victor Stinner26fabe12010-10-18 12:03:25 +0000459 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000460
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400461 mod = PyImport_AddModuleObject(fullname);
Victor Stinner26fabe12010-10-18 12:03:25 +0000462 if (mod == NULL)
463 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 dict = PyModule_GetDict(mod);
Just van Rossum52e14d62002-12-30 22:08:05 +0000465
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000466 /* mod.__loader__ = self */
467 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
468 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000469
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000470 if (ispackage) {
471 /* add __path__ to the module *before* the code gets
472 executed */
Victor Stinneraf8b7e82013-10-29 01:46:24 +0100473 PyObject *pkgpath, *fullpath, *subname;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000475
Victor Stinneraf8b7e82013-10-29 01:46:24 +0100476 subname = get_subname(fullname);
477 if (subname == NULL)
478 goto error;
479
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400480 fullpath = PyUnicode_FromFormat("%U%c%U%U",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000481 self->archive, SEP,
482 self->prefix, subname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400483 Py_DECREF(subname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000484 if (fullpath == NULL)
485 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000486
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400487 pkgpath = Py_BuildValue("[N]", fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000488 if (pkgpath == NULL)
489 goto error;
490 err = PyDict_SetItemString(dict, "__path__", pkgpath);
491 Py_DECREF(pkgpath);
492 if (err != 0)
493 goto error;
494 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400495 mod = PyImport_ExecCodeModuleObject(fullname, code, modpath, NULL);
Victor Stinner26fabe12010-10-18 12:03:25 +0000496 Py_CLEAR(code);
497 if (mod == NULL)
498 goto error;
499
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400501 PySys_FormatStderr("import %U # loaded from Zip %U\n",
Victor Stinner08654e12010-10-18 12:09:02 +0000502 fullname, modpath);
503 Py_DECREF(modpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +0000505error:
Victor Stinner26fabe12010-10-18 12:03:25 +0000506 Py_XDECREF(code);
Victor Stinner08654e12010-10-18 12:09:02 +0000507 Py_XDECREF(modpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000508 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000509}
510
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000511/* Return a string matching __file__ for the named module */
512static PyObject *
513zipimporter_get_filename(PyObject *obj, PyObject *args)
514{
515 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400516 PyObject *fullname, *code, *modpath;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000517 int ispackage;
518
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400519 if (!PyArg_ParseTuple(args, "U:zipimporter.get_filename",
Victor Stinner9e40fad2010-10-18 22:34:46 +0000520 &fullname))
Victor Stinnerc342fca2010-10-18 11:39:05 +0000521 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000522
523 /* Deciding the filename requires working out where the code
524 would come from if the module was actually loaded */
525 code = get_module_code(self, fullname, &ispackage, &modpath);
526 if (code == NULL)
Victor Stinnerc342fca2010-10-18 11:39:05 +0000527 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000528 Py_DECREF(code); /* Only need the path info */
529
Victor Stinner08654e12010-10-18 12:09:02 +0000530 return modpath;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000531}
532
Just van Rossum52e14d62002-12-30 22:08:05 +0000533/* Return a bool signifying whether the module is a package or not. */
534static PyObject *
535zipimporter_is_package(PyObject *obj, PyObject *args)
536{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000537 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400538 PyObject *fullname;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000540
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400541 if (!PyArg_ParseTuple(args, "U:zipimporter.is_package",
Victor Stinner965a8a12010-10-18 21:44:33 +0000542 &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000544
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000545 mi = get_module_info(self, fullname);
546 if (mi == MI_ERROR)
Victor Stinner965a8a12010-10-18 21:44:33 +0000547 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000548 if (mi == MI_NOT_FOUND) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400549 PyErr_Format(ZipImportError, "can't find module %R", fullname);
Victor Stinner965a8a12010-10-18 21:44:33 +0000550 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 }
552 return PyBool_FromLong(mi == MI_PACKAGE);
Just van Rossum52e14d62002-12-30 22:08:05 +0000553}
554
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200555
Just van Rossum52e14d62002-12-30 22:08:05 +0000556static PyObject *
557zipimporter_get_data(PyObject *obj, PyObject *args)
558{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000559 ZipImporter *self = (ZipImporter *)obj;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100560 PyObject *path, *key;
Benjamin Peterson34c15402014-02-16 14:17:28 -0500561 PyObject *toc_entry;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100562 Py_ssize_t path_start, path_len, len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000563
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100564 if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &path))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000565 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000566
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200567#ifdef ALTSEP
Martin v. Löwiscfa61292011-10-31 09:01:22 +0100568 path = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100569 if (!path)
570 return NULL;
571#else
572 Py_INCREF(path);
Just van Rossum52e14d62002-12-30 22:08:05 +0000573#endif
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100574 if (PyUnicode_READY(path) == -1)
575 goto error;
576
577 path_len = PyUnicode_GET_LENGTH(path);
578
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200579 len = PyUnicode_GET_LENGTH(self->archive);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100580 path_start = 0;
581 if (PyUnicode_Tailmatch(path, self->archive, 0, len, -1)
582 && PyUnicode_READ_CHAR(path, len) == SEP) {
583 path_start = len + 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000584 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000585
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100586 key = PyUnicode_Substring(path, path_start, path_len);
Victor Stinner60fe8d92010-08-16 23:48:11 +0000587 if (key == NULL)
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100588 goto error;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000589 toc_entry = PyDict_GetItem(self->files, key);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000590 if (toc_entry == NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000591 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key);
592 Py_DECREF(key);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100593 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000594 }
Victor Stinner60fe8d92010-08-16 23:48:11 +0000595 Py_DECREF(key);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100596 Py_DECREF(path);
Benjamin Peterson34c15402014-02-16 14:17:28 -0500597 return get_data(self->archive, toc_entry);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100598 error:
599 Py_DECREF(path);
600 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000601}
602
603static PyObject *
604zipimporter_get_code(PyObject *obj, PyObject *args)
605{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000606 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400607 PyObject *fullname;
Just van Rossum52e14d62002-12-30 22:08:05 +0000608
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400609 if (!PyArg_ParseTuple(args, "U:zipimporter.get_code", &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000610 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000611
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000612 return get_module_code(self, fullname, NULL, NULL);
Just van Rossum52e14d62002-12-30 22:08:05 +0000613}
614
615static PyObject *
616zipimporter_get_source(PyObject *obj, PyObject *args)
617{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000618 ZipImporter *self = (ZipImporter *)obj;
619 PyObject *toc_entry;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400620 PyObject *fullname, *subname, *path, *fullpath;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000621 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000622
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400623 if (!PyArg_ParseTuple(args, "U:zipimporter.get_source", &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000624 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000625
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000626 mi = get_module_info(self, fullname);
Victor Stinner965a8a12010-10-18 21:44:33 +0000627 if (mi == MI_ERROR)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000628 return NULL;
Victor Stinner04106562010-10-18 20:44:08 +0000629 if (mi == MI_NOT_FOUND) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400630 PyErr_Format(ZipImportError, "can't find module %R", fullname);
Victor Stinner04106562010-10-18 20:44:08 +0000631 return NULL;
632 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400633
Victor Stinner965a8a12010-10-18 21:44:33 +0000634 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400635 if (subname == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000636 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000637
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400638 path = make_filename(self->prefix, subname);
639 Py_DECREF(subname);
640 if (path == NULL)
641 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000642
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400643 if (mi == MI_PACKAGE)
644 fullpath = PyUnicode_FromFormat("%U%c__init__.py", path, SEP);
645 else
646 fullpath = PyUnicode_FromFormat("%U.py", path);
647 Py_DECREF(path);
648 if (fullpath == NULL)
649 return NULL;
650
651 toc_entry = PyDict_GetItem(self->files, fullpath);
652 Py_DECREF(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000653 if (toc_entry != NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000654 PyObject *res, *bytes;
Benjamin Peterson34c15402014-02-16 14:17:28 -0500655 bytes = get_data(self->archive, toc_entry);
Victor Stinner60fe8d92010-08-16 23:48:11 +0000656 if (bytes == NULL)
657 return NULL;
658 res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
659 PyBytes_GET_SIZE(bytes));
660 Py_DECREF(bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000661 return res;
662 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000663
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000664 /* we have the module, but no source */
Benjamin Peterson34c15402014-02-16 14:17:28 -0500665 Py_INCREF(Py_None);
666 return Py_None;
Just van Rossum52e14d62002-12-30 22:08:05 +0000667}
668
669PyDoc_STRVAR(doc_find_module,
670"find_module(fullname, path=None) -> self or None.\n\
671\n\
672Search for a module specified by 'fullname'. 'fullname' must be the\n\
673fully qualified (dotted) module name. It returns the zipimporter\n\
674instance itself if the module was found, or None if it wasn't.\n\
675The optional 'path' argument is ignored -- it's there for compatibility\n\
676with the importer protocol.");
677
Eric V. Smith984b11f2012-05-24 20:21:04 -0400678PyDoc_STRVAR(doc_find_loader,
679"find_loader(fullname, path=None) -> self, str or None.\n\
680\n\
681Search for a module specified by 'fullname'. 'fullname' must be the\n\
682fully qualified (dotted) module name. It returns the zipimporter\n\
683instance itself if the module was found, a string containing the\n\
684full path name if it's possibly a portion of a namespace package,\n\
685or None otherwise. The optional 'path' argument is ignored -- it's\n\
686 there for compatibility with the importer protocol.");
687
Just van Rossum52e14d62002-12-30 22:08:05 +0000688PyDoc_STRVAR(doc_load_module,
689"load_module(fullname) -> module.\n\
690\n\
691Load the module specified by 'fullname'. 'fullname' must be the\n\
692fully qualified (dotted) module name. It returns the imported\n\
693module, or raises ZipImportError if it wasn't found.");
694
695PyDoc_STRVAR(doc_get_data,
696"get_data(pathname) -> string with file data.\n\
697\n\
698Return the data associated with 'pathname'. Raise IOError if\n\
699the file wasn't found.");
700
701PyDoc_STRVAR(doc_is_package,
702"is_package(fullname) -> bool.\n\
703\n\
704Return True if the module specified by fullname is a package.\n\
Brian Curtin32839732010-07-21 01:44:19 +0000705Raise ZipImportError if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000706
707PyDoc_STRVAR(doc_get_code,
708"get_code(fullname) -> code object.\n\
709\n\
710Return the code object for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000711if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000712
713PyDoc_STRVAR(doc_get_source,
714"get_source(fullname) -> source string.\n\
715\n\
716Return the source code for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000717if the module couldn't be found, return None if the archive does\n\
Just van Rossum52e14d62002-12-30 22:08:05 +0000718contain the module, but has no source for it.");
719
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000720
721PyDoc_STRVAR(doc_get_filename,
Nick Coghlan9a1d6e32009-02-08 03:37:27 +0000722"get_filename(fullname) -> filename string.\n\
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000723\n\
724Return the filename for the specified module.");
725
Just van Rossum52e14d62002-12-30 22:08:05 +0000726static PyMethodDef zipimporter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000727 {"find_module", zipimporter_find_module, METH_VARARGS,
728 doc_find_module},
Eric V. Smith984b11f2012-05-24 20:21:04 -0400729 {"find_loader", zipimporter_find_loader, METH_VARARGS,
730 doc_find_loader},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000731 {"load_module", zipimporter_load_module, METH_VARARGS,
732 doc_load_module},
733 {"get_data", zipimporter_get_data, METH_VARARGS,
734 doc_get_data},
735 {"get_code", zipimporter_get_code, METH_VARARGS,
736 doc_get_code},
737 {"get_source", zipimporter_get_source, METH_VARARGS,
738 doc_get_source},
739 {"get_filename", zipimporter_get_filename, METH_VARARGS,
740 doc_get_filename},
741 {"is_package", zipimporter_is_package, METH_VARARGS,
742 doc_is_package},
743 {NULL, NULL} /* sentinel */
Just van Rossum52e14d62002-12-30 22:08:05 +0000744};
745
746static PyMemberDef zipimporter_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000747 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
748 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
749 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
750 {NULL}
Just van Rossum52e14d62002-12-30 22:08:05 +0000751};
752
753PyDoc_STRVAR(zipimporter_doc,
754"zipimporter(archivepath) -> zipimporter object\n\
755\n\
756Create a new zipimporter instance. 'archivepath' must be a path to\n\
Alexandre Vassalotti8ae3e052008-05-16 00:41:41 +0000757a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
758'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
759valid directory inside the archive.\n\
760\n\
761'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
762archive.\n\
763\n\
764The 'archive' attribute of zipimporter objects contains the name of the\n\
765zipfile targeted.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000766
767#define DEFERRED_ADDRESS(ADDR) 0
768
769static PyTypeObject ZipImporter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000770 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
771 "zipimport.zipimporter",
772 sizeof(ZipImporter),
773 0, /* tp_itemsize */
774 (destructor)zipimporter_dealloc, /* tp_dealloc */
775 0, /* tp_print */
776 0, /* tp_getattr */
777 0, /* tp_setattr */
778 0, /* tp_reserved */
779 (reprfunc)zipimporter_repr, /* tp_repr */
780 0, /* tp_as_number */
781 0, /* tp_as_sequence */
782 0, /* tp_as_mapping */
783 0, /* tp_hash */
784 0, /* tp_call */
785 0, /* tp_str */
786 PyObject_GenericGetAttr, /* tp_getattro */
787 0, /* tp_setattro */
788 0, /* tp_as_buffer */
789 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
790 Py_TPFLAGS_HAVE_GC, /* tp_flags */
791 zipimporter_doc, /* tp_doc */
792 zipimporter_traverse, /* tp_traverse */
793 0, /* tp_clear */
794 0, /* tp_richcompare */
795 0, /* tp_weaklistoffset */
796 0, /* tp_iter */
797 0, /* tp_iternext */
798 zipimporter_methods, /* tp_methods */
799 zipimporter_members, /* tp_members */
800 0, /* tp_getset */
801 0, /* tp_base */
802 0, /* tp_dict */
803 0, /* tp_descr_get */
804 0, /* tp_descr_set */
805 0, /* tp_dictoffset */
806 (initproc)zipimporter_init, /* tp_init */
807 PyType_GenericAlloc, /* tp_alloc */
808 PyType_GenericNew, /* tp_new */
809 PyObject_GC_Del, /* tp_free */
Just van Rossum52e14d62002-12-30 22:08:05 +0000810};
811
812
813/* implementation */
814
Just van Rossum52e14d62002-12-30 22:08:05 +0000815/* Given a buffer, return the long that is represented by the first
816 4 bytes, encoded as little endian. This partially reimplements
817 marshal.c:r_long() */
818static long
819get_long(unsigned char *buf) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000820 long x;
821 x = buf[0];
822 x |= (long)buf[1] << 8;
823 x |= (long)buf[2] << 16;
824 x |= (long)buf[3] << 24;
Just van Rossum52e14d62002-12-30 22:08:05 +0000825#if SIZEOF_LONG > 4
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000826 /* Sign extension for 64-bit machines */
827 x |= -(x & 0x80000000L);
Just van Rossum52e14d62002-12-30 22:08:05 +0000828#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000829 return x;
Just van Rossum52e14d62002-12-30 22:08:05 +0000830}
831
Gregory P. Smith2bcbc142014-01-07 18:30:07 -0800832/*
Benjamin Peterson34c15402014-02-16 14:17:28 -0500833 read_directory(archive) -> files dict (new reference)
Gregory P. Smith2bcbc142014-01-07 18:30:07 -0800834
Benjamin Peterson34c15402014-02-16 14:17:28 -0500835 Given a path to a Zip archive, build a dict, mapping file names
Just van Rossum52e14d62002-12-30 22:08:05 +0000836 (local to the archive, using SEP as a separator) to toc entries.
837
838 A toc_entry is a tuple:
839
Victor Stinner08654e12010-10-18 12:09:02 +0000840 (__file__, # value to use for __file__, available for all files,
841 # encoded to the filesystem encoding
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000842 compress, # compression kind; 0 for uncompressed
843 data_size, # size of compressed data on disk
844 file_size, # size of decompressed data
845 file_offset, # offset of file header from start of archive
846 time, # mod time of file (in dos format)
847 date, # mod data of file (in dos format)
848 crc, # crc checksum of the data
Victor Stinnerc342fca2010-10-18 11:39:05 +0000849 )
Just van Rossum52e14d62002-12-30 22:08:05 +0000850
851 Directories can be recognized by the trailing SEP in the name,
852 data_size and file_offset are 0.
853*/
854static PyObject *
Benjamin Peterson34c15402014-02-16 14:17:28 -0500855read_directory(PyObject *archive)
Just van Rossum52e14d62002-12-30 22:08:05 +0000856{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000857 PyObject *files = NULL;
Benjamin Peterson34c15402014-02-16 14:17:28 -0500858 FILE *fp;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000859 unsigned short flags;
Gregory P. Smithab320662012-01-30 15:17:33 -0800860 short compress, time, date, name_size;
861 long crc, data_size, file_size, header_size;
862 Py_ssize_t file_offset, header_position, header_offset;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200863 long l, count;
864 Py_ssize_t i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000865 char name[MAXPATHLEN + 5];
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200866 char dummy[8]; /* Buffer to read unused header values into */
Victor Stinner2460a432010-08-16 17:54:28 +0000867 PyObject *nameobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 char *p, endof_central_dir[22];
Gregory P. Smithab320662012-01-30 15:17:33 -0800869 Py_ssize_t arc_offset; /* Absolute offset to start of the zip-archive. */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100870 PyObject *path;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000871 const char *charset;
Victor Stinner4ee65a92011-01-22 10:30:29 +0000872 int bootstrap;
Just van Rossum52e14d62002-12-30 22:08:05 +0000873
Benjamin Peterson34c15402014-02-16 14:17:28 -0500874 fp = _Py_fopen_obj(archive, "rb");
875 if (fp == NULL) {
Victor Stinnerfbd6f9e2015-03-20 10:52:25 +0100876 if (PyErr_ExceptionMatches(PyExc_OSError)) {
877 PyObject *exc, *val, *tb;
878 PyErr_Fetch(&exc, &val, &tb);
Benjamin Peterson34c15402014-02-16 14:17:28 -0500879 PyErr_Format(ZipImportError, "can't open Zip file: %R", archive);
Victor Stinnerfbd6f9e2015-03-20 10:52:25 +0100880 _PyErr_ChainExceptions(exc, val, tb);
881 }
Benjamin Peterson34c15402014-02-16 14:17:28 -0500882 return NULL;
883 }
884
Jesus Cea09bf7a72012-10-03 02:13:05 +0200885 if (fseek(fp, -22, SEEK_END) == -1) {
Benjamin Peterson34c15402014-02-16 14:17:28 -0500886 fclose(fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +0200887 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
888 return NULL;
889 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000890 header_position = ftell(fp);
891 if (fread(endof_central_dir, 1, 22, fp) != 22) {
Benjamin Peterson34c15402014-02-16 14:17:28 -0500892 fclose(fp);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400893 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000894 return NULL;
895 }
896 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
897 /* Bad: End of Central Dir signature */
Benjamin Peterson34c15402014-02-16 14:17:28 -0500898 fclose(fp);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400899 PyErr_Format(ZipImportError, "not a Zip file: %R", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000900 return NULL;
901 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000902
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 header_size = get_long((unsigned char *)endof_central_dir + 12);
904 header_offset = get_long((unsigned char *)endof_central_dir + 16);
905 arc_offset = header_position - header_offset - header_size;
906 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000907
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000908 files = PyDict_New();
909 if (files == NULL)
910 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000911
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000912 /* Start of Central Directory */
913 count = 0;
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200914 if (fseek(fp, header_offset, 0) == -1)
915 goto file_error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000916 for (;;) {
917 PyObject *t;
918 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000919
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200920 /* Start of file header */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000921 l = PyMarshal_ReadLongFromFile(fp);
Victor Stinner73660af2013-10-29 01:43:44 +0100922 if (l == -1 && PyErr_Occurred())
923 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000924 if (l != 0x02014B50)
925 break; /* Bad: Central Dir File Header */
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200926
927 /* On Windows, calling fseek to skip over the fields we don't use is
928 slower than reading the data into a dummy buffer because fseek flushes
929 stdio's internal buffers. See issue #8745. */
930 if (fread(dummy, 1, 4, fp) != 4) /* Skip unused fields, avoid fseek */
931 goto file_error;
932
Victor Stinnerd36c8212010-10-18 12:13:46 +0000933 flags = (unsigned short)PyMarshal_ReadShortFromFile(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000934 compress = PyMarshal_ReadShortFromFile(fp);
935 time = PyMarshal_ReadShortFromFile(fp);
936 date = PyMarshal_ReadShortFromFile(fp);
937 crc = PyMarshal_ReadLongFromFile(fp);
938 data_size = PyMarshal_ReadLongFromFile(fp);
939 file_size = PyMarshal_ReadLongFromFile(fp);
940 name_size = PyMarshal_ReadShortFromFile(fp);
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200941 header_size = name_size +
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000942 PyMarshal_ReadShortFromFile(fp) +
943 PyMarshal_ReadShortFromFile(fp);
Victor Stinner5200f552015-03-18 13:56:25 +0100944 if (PyErr_Occurred())
945 goto error;
946
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200947 if (fread(dummy, 1, 8, fp) != 8) /* Skip unused fields, avoid fseek */
948 goto file_error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000949 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Victor Stinner73660af2013-10-29 01:43:44 +0100950 if (PyErr_Occurred())
951 goto error;
952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000953 if (name_size > MAXPATHLEN)
954 name_size = MAXPATHLEN;
Just van Rossum52e14d62002-12-30 22:08:05 +0000955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 p = name;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200957 for (i = 0; i < (Py_ssize_t)name_size; i++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000958 *p = (char)getc(fp);
959 if (*p == '/')
960 *p = SEP;
961 p++;
962 }
963 *p = 0; /* Add terminating null byte */
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200964 for (; i < header_size; i++) /* Skip the rest of the header */
965 if(getc(fp) == EOF) /* Avoid fseek */
966 goto file_error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000967
Victor Stinner4ee65a92011-01-22 10:30:29 +0000968 bootstrap = 0;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000969 if (flags & 0x0800)
970 charset = "utf-8";
Victor Stinner4ee65a92011-01-22 10:30:29 +0000971 else if (!PyThreadState_GET()->interp->codecs_initialized) {
972 /* During bootstrap, we may need to load the encodings
973 package from a ZIP file. But the cp437 encoding is implemented
974 in Python in the encodings package.
975
976 Break out of this dependency by assuming that the path to
977 the encodings module is ASCII-only. */
978 charset = "ascii";
979 bootstrap = 1;
980 }
Victor Stinnerd36c8212010-10-18 12:13:46 +0000981 else
982 charset = "cp437";
983 nameobj = PyUnicode_Decode(name, name_size, charset, NULL);
Victor Stinner4ee65a92011-01-22 10:30:29 +0000984 if (nameobj == NULL) {
985 if (bootstrap)
986 PyErr_Format(PyExc_NotImplementedError,
987 "bootstrap issue: python%i%i.zip contains non-ASCII "
988 "filenames without the unicode flag",
989 PY_MAJOR_VERSION, PY_MINOR_VERSION);
Victor Stinner2460a432010-08-16 17:54:28 +0000990 goto error;
Victor Stinner4ee65a92011-01-22 10:30:29 +0000991 }
Stefan Krah000fde92012-08-20 14:14:49 +0200992 if (PyUnicode_READY(nameobj) == -1)
993 goto error;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100994 path = PyUnicode_FromFormat("%U%c%U", archive, SEP, nameobj);
995 if (path == NULL)
Victor Stinner2460a432010-08-16 17:54:28 +0000996 goto error;
Gregory P. Smithcc6abd52012-01-30 15:55:29 -0800997 t = Py_BuildValue("Nhllnhhl", path, compress, data_size,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000998 file_size, file_offset, time, date, crc);
999 if (t == NULL)
1000 goto error;
Victor Stinner2460a432010-08-16 17:54:28 +00001001 err = PyDict_SetItem(files, nameobj, t);
1002 Py_CLEAR(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001003 Py_DECREF(t);
1004 if (err != 0)
1005 goto error;
1006 count++;
1007 }
Benjamin Peterson34c15402014-02-16 14:17:28 -05001008 fclose(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001009 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001010 PySys_FormatStderr("# zipimport: found %ld names in %R\n",
1011 count, archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001012 return files;
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +02001013file_error:
Benjamin Peterson34c15402014-02-16 14:17:28 -05001014 fclose(fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001015 Py_XDECREF(files);
1016 Py_XDECREF(nameobj);
1017 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1018 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001019error:
Benjamin Peterson34c15402014-02-16 14:17:28 -05001020 fclose(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001021 Py_XDECREF(files);
Victor Stinner2460a432010-08-16 17:54:28 +00001022 Py_XDECREF(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001023 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001024}
1025
1026/* Return the zlib.decompress function object, or NULL if zlib couldn't
1027 be imported. The function is cached when found, so subsequent calls
Victor Stinner4925cde2011-05-20 00:16:09 +02001028 don't import zlib again. */
Just van Rossum52e14d62002-12-30 22:08:05 +00001029static PyObject *
1030get_decompress_func(void)
1031{
Victor Stinner4925cde2011-05-20 00:16:09 +02001032 static int importing_zlib = 0;
1033 PyObject *zlib;
1034 PyObject *decompress;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001035 _Py_IDENTIFIER(decompress);
Just van Rossum52e14d62002-12-30 22:08:05 +00001036
Victor Stinner4925cde2011-05-20 00:16:09 +02001037 if (importing_zlib != 0)
1038 /* Someone has a zlib.py[co] in their Zip file;
1039 let's avoid a stack overflow. */
1040 return NULL;
1041 importing_zlib = 1;
1042 zlib = PyImport_ImportModuleNoBlock("zlib");
1043 importing_zlib = 0;
1044 if (zlib != NULL) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001045 decompress = _PyObject_GetAttrId(zlib,
1046 &PyId_decompress);
Victor Stinner4925cde2011-05-20 00:16:09 +02001047 Py_DECREF(zlib);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 }
Victor Stinner4925cde2011-05-20 00:16:09 +02001049 else {
1050 PyErr_Clear();
1051 decompress = NULL;
1052 }
1053 if (Py_VerboseFlag)
1054 PySys_WriteStderr("# zipimport: zlib %s\n",
1055 zlib != NULL ? "available": "UNAVAILABLE");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 return decompress;
Just van Rossum52e14d62002-12-30 22:08:05 +00001057}
1058
Benjamin Peterson34c15402014-02-16 14:17:28 -05001059/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
Just van Rossum52e14d62002-12-30 22:08:05 +00001060 data as a new reference. */
1061static PyObject *
Benjamin Peterson34c15402014-02-16 14:17:28 -05001062get_data(PyObject *archive, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +00001063{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001064 PyObject *raw_data, *data = NULL, *decompress;
1065 char *buf;
Benjamin Peterson34c15402014-02-16 14:17:28 -05001066 FILE *fp;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001067 int err;
1068 Py_ssize_t bytes_read = 0;
1069 long l;
Victor Stinner60fe8d92010-08-16 23:48:11 +00001070 PyObject *datapath;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001071 long compress, data_size, file_size, file_offset, bytes_size;
1072 long time, date, crc;
Just van Rossum52e14d62002-12-30 22:08:05 +00001073
Victor Stinner60fe8d92010-08-16 23:48:11 +00001074 if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 &data_size, &file_size, &file_offset, &time,
1076 &date, &crc)) {
1077 return NULL;
1078 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001079
Benjamin Peterson34c15402014-02-16 14:17:28 -05001080 fp = _Py_fopen_obj(archive, "rb");
Victor Stinnere42ccd22015-03-18 01:39:23 +01001081 if (!fp)
Benjamin Peterson34c15402014-02-16 14:17:28 -05001082 return NULL;
Benjamin Peterson34c15402014-02-16 14:17:28 -05001083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 /* Check to make sure the local file header is correct */
Jesus Cea09bf7a72012-10-03 02:13:05 +02001085 if (fseek(fp, file_offset, 0) == -1) {
Benjamin Peterson34c15402014-02-16 14:17:28 -05001086 fclose(fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001087 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1088 return NULL;
1089 }
1090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 l = PyMarshal_ReadLongFromFile(fp);
1092 if (l != 0x04034B50) {
1093 /* Bad: Local File Header */
Victor Stinner73660af2013-10-29 01:43:44 +01001094 if (!PyErr_Occurred())
1095 PyErr_Format(ZipImportError,
1096 "bad local file header in %U",
1097 archive);
Benjamin Peterson34c15402014-02-16 14:17:28 -05001098 fclose(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 return NULL;
1100 }
Jesus Cea09bf7a72012-10-03 02:13:05 +02001101 if (fseek(fp, file_offset + 26, 0) == -1) {
Benjamin Peterson34c15402014-02-16 14:17:28 -05001102 fclose(fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001103 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1104 return NULL;
1105 }
1106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 l = 30 + PyMarshal_ReadShortFromFile(fp) +
1108 PyMarshal_ReadShortFromFile(fp); /* local header size */
Victor Stinner73660af2013-10-29 01:43:44 +01001109 if (PyErr_Occurred()) {
Benjamin Peterson34c15402014-02-16 14:17:28 -05001110 fclose(fp);
Victor Stinner73660af2013-10-29 01:43:44 +01001111 return NULL;
1112 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 file_offset += l; /* Start of file data */
Just van Rossum52e14d62002-12-30 22:08:05 +00001114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 bytes_size = compress == 0 ? data_size : data_size + 1;
1116 if (bytes_size == 0)
1117 bytes_size++;
1118 raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
Just van Rossum52e14d62002-12-30 22:08:05 +00001119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001120 if (raw_data == NULL) {
Benjamin Peterson34c15402014-02-16 14:17:28 -05001121 fclose(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001122 return NULL;
1123 }
1124 buf = PyBytes_AsString(raw_data);
Just van Rossum52e14d62002-12-30 22:08:05 +00001125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 err = fseek(fp, file_offset, 0);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001127 if (err == 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 bytes_read = fread(buf, 1, data_size, fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001129 } else {
Benjamin Peterson34c15402014-02-16 14:17:28 -05001130 fclose(fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001131 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1132 return NULL;
1133 }
Benjamin Peterson34c15402014-02-16 14:17:28 -05001134 fclose(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001135 if (err || bytes_read != data_size) {
1136 PyErr_SetString(PyExc_IOError,
1137 "zipimport: can't read data");
1138 Py_DECREF(raw_data);
1139 return NULL;
1140 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 if (compress != 0) {
1143 buf[data_size] = 'Z'; /* saw this in zipfile.py */
1144 data_size++;
1145 }
1146 buf[data_size] = '\0';
Just van Rossum52e14d62002-12-30 22:08:05 +00001147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 if (compress == 0) { /* data is not compressed */
1149 data = PyBytes_FromStringAndSize(buf, data_size);
1150 Py_DECREF(raw_data);
1151 return data;
1152 }
1153
1154 /* Decompress with zlib */
1155 decompress = get_decompress_func();
1156 if (decompress == NULL) {
1157 PyErr_SetString(ZipImportError,
1158 "can't decompress data; "
1159 "zlib not available");
1160 goto error;
1161 }
1162 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Victor Stinner4925cde2011-05-20 00:16:09 +02001163 Py_DECREF(decompress);
Just van Rossum52e14d62002-12-30 22:08:05 +00001164error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 Py_DECREF(raw_data);
1166 return data;
Just van Rossum52e14d62002-12-30 22:08:05 +00001167}
1168
1169/* Lenient date/time comparison function. The precision of the mtime
1170 in the archive is lower than the mtime stored in a .pyc: we
1171 must allow a difference of at most one second. */
1172static int
1173eq_mtime(time_t t1, time_t t2)
1174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 time_t d = t1 - t2;
1176 if (d < 0)
1177 d = -d;
1178 /* dostime only stores even seconds, so be lenient */
1179 return d <= 1;
Just van Rossum52e14d62002-12-30 22:08:05 +00001180}
1181
1182/* Given the contents of a .py[co] file in a buffer, unmarshal the data
1183 and return the code object. Return None if it the magic word doesn't
1184 match (we do this instead of raising an exception as we fall back
1185 to .py if available and we don't want to mask other errors).
1186 Returns a new reference. */
1187static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001188unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
Just van Rossum52e14d62002-12-30 22:08:05 +00001189{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001190 PyObject *code;
1191 char *buf = PyBytes_AsString(data);
1192 Py_ssize_t size = PyBytes_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +00001193
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001194 if (size <= 9) {
1195 PyErr_SetString(ZipImportError,
1196 "bad pyc data");
1197 return NULL;
1198 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
1201 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001202 PySys_FormatStderr("# %R has bad magic\n",
1203 pathname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001204 Py_INCREF(Py_None);
1205 return Py_None; /* signal caller to try alternative */
1206 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001207
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
1209 mtime)) {
1210 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001211 PySys_FormatStderr("# %R has bad mtime\n",
1212 pathname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001213 Py_INCREF(Py_None);
1214 return Py_None; /* signal caller to try alternative */
1215 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001216
Antoine Pitrou5136ac02012-01-13 18:52:16 +01001217 /* XXX the pyc's size field is ignored; timestamp collisions are probably
1218 unimportant with zip files. */
1219 code = PyMarshal_ReadObjectFromString(buf + 12, size - 12);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001220 if (code == NULL)
1221 return NULL;
1222 if (!PyCode_Check(code)) {
1223 Py_DECREF(code);
1224 PyErr_Format(PyExc_TypeError,
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001225 "compiled module %R is not a code object",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 pathname);
1227 return NULL;
1228 }
1229 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001230}
1231
1232/* Replace any occurances of "\r\n?" in the input string with "\n".
1233 This converts DOS and Mac line endings to Unix line endings.
1234 Also append a trailing "\n" to be compatible with
1235 PyParser_SimpleParseFile(). Returns a new reference. */
1236static PyObject *
1237normalize_line_endings(PyObject *source)
1238{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001239 char *buf, *q, *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001240 PyObject *fixed_source;
1241 int len = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +00001242
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001243 p = PyBytes_AsString(source);
1244 if (p == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001245 return PyBytes_FromStringAndSize("\n\0", 2);
1246 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001247
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001248 /* one char extra for trailing \n and one for terminating \0 */
1249 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1250 if (buf == NULL) {
1251 PyErr_SetString(PyExc_MemoryError,
1252 "zipimport: no memory to allocate "
1253 "source buffer");
1254 return NULL;
1255 }
1256 /* replace "\r\n?" by "\n" */
1257 for (q = buf; *p != '\0'; p++) {
1258 if (*p == '\r') {
1259 *q++ = '\n';
1260 if (*(p + 1) == '\n')
1261 p++;
1262 }
1263 else
1264 *q++ = *p;
1265 len++;
1266 }
1267 *q++ = '\n'; /* add trailing \n */
1268 *q = '\0';
1269 fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1270 PyMem_Free(buf);
1271 return fixed_source;
Just van Rossum52e14d62002-12-30 22:08:05 +00001272}
1273
1274/* Given a string buffer containing Python source code, compile it
Brett Cannon83358c92013-06-20 21:30:32 -04001275 and return a code object as a new reference. */
Just van Rossum52e14d62002-12-30 22:08:05 +00001276static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001277compile_source(PyObject *pathname, PyObject *source)
Just van Rossum52e14d62002-12-30 22:08:05 +00001278{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001279 PyObject *code, *fixed_source, *pathbytes;
Just van Rossum52e14d62002-12-30 22:08:05 +00001280
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001281 pathbytes = PyUnicode_EncodeFSDefault(pathname);
1282 if (pathbytes == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001283 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001284
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001285 fixed_source = normalize_line_endings(source);
1286 if (fixed_source == NULL) {
1287 Py_DECREF(pathbytes);
1288 return NULL;
1289 }
1290
1291 code = Py_CompileString(PyBytes_AsString(fixed_source),
1292 PyBytes_AsString(pathbytes),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001293 Py_file_input);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001294 Py_DECREF(pathbytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 Py_DECREF(fixed_source);
1296 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001297}
1298
1299/* Convert the date/time values found in the Zip archive to a value
1300 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001301static time_t
1302parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001303{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001304 struct tm stm;
Just van Rossum52e14d62002-12-30 22:08:05 +00001305
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 memset((void *) &stm, '\0', sizeof(stm));
Christian Heimes679db4a2008-01-18 09:56:22 +00001307
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001308 stm.tm_sec = (dostime & 0x1f) * 2;
1309 stm.tm_min = (dostime >> 5) & 0x3f;
1310 stm.tm_hour = (dostime >> 11) & 0x1f;
1311 stm.tm_mday = dosdate & 0x1f;
1312 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1313 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
1314 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001315
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 return mktime(&stm);
Just van Rossum52e14d62002-12-30 22:08:05 +00001317}
1318
Brett Cannonf299abd2015-04-13 14:21:02 -04001319/* Given a path to a .pyc file in the archive, return the
Ezio Melotti13925002011-03-16 11:05:33 +02001320 modification time of the matching .py file, or 0 if no source
Just van Rossum52e14d62002-12-30 22:08:05 +00001321 is available. */
1322static time_t
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001323get_mtime_of_source(ZipImporter *self, PyObject *path)
Just van Rossum52e14d62002-12-30 22:08:05 +00001324{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001325 PyObject *toc_entry, *stripped;
1326 time_t mtime;
1327
1328 /* strip 'c' or 'o' from *.py[co] */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001329 if (PyUnicode_READY(path) == -1)
1330 return (time_t)-1;
1331 stripped = PyUnicode_FromKindAndData(PyUnicode_KIND(path),
1332 PyUnicode_DATA(path),
1333 PyUnicode_GET_LENGTH(path) - 1);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001334 if (stripped == NULL)
1335 return (time_t)-1;
1336
1337 toc_entry = PyDict_GetItem(self->files, stripped);
1338 Py_DECREF(stripped);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001339 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1340 PyTuple_Size(toc_entry) == 8) {
1341 /* fetch the time stamp of the .py file for comparison
1342 with an embedded pyc time stamp */
1343 int time, date;
1344 time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1345 date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1346 mtime = parse_dostime(time, date);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001347 } else
1348 mtime = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 return mtime;
Just van Rossum52e14d62002-12-30 22:08:05 +00001350}
1351
1352/* Return the code object for the module named by 'fullname' from the
1353 Zip archive as a new reference. */
1354static PyObject *
Benjamin Peterson34c15402014-02-16 14:17:28 -05001355get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001356 time_t mtime, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +00001357{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001358 PyObject *data, *modpath, *code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001359
Benjamin Peterson34c15402014-02-16 14:17:28 -05001360 data = get_data(self->archive, toc_entry);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001361 if (data == NULL)
1362 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001363
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001364 modpath = PyTuple_GetItem(toc_entry, 0);
Victor Stinner2a94f4c2010-10-18 12:15:34 +00001365 if (isbytecode)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001366 code = unmarshal_code(modpath, data, mtime);
Victor Stinner2a94f4c2010-10-18 12:15:34 +00001367 else
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001368 code = compile_source(modpath, data);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001369 Py_DECREF(data);
1370 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001371}
1372
Ezio Melotti42da6632011-03-15 05:18:48 +02001373/* Get the code object associated with the module specified by
Just van Rossum52e14d62002-12-30 22:08:05 +00001374 'fullname'. */
1375static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001376get_module_code(ZipImporter *self, PyObject *fullname,
Victor Stinner08654e12010-10-18 12:09:02 +00001377 int *p_ispackage, PyObject **p_modpath)
Just van Rossum52e14d62002-12-30 22:08:05 +00001378{
Gregory P. Smith95c7c462011-05-21 05:19:42 -07001379 PyObject *code = NULL, *toc_entry, *subname;
Victor Stinner9a2261a2011-05-26 13:59:41 +02001380 PyObject *path, *fullpath = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001381 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +00001382
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001384 if (subname == NULL)
1385 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001386
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001387 path = make_filename(self->prefix, subname);
1388 Py_DECREF(subname);
1389 if (path == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001391
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 for (zso = zip_searchorder; *zso->suffix; zso++) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001393 code = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001394
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001395 fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
1396 if (fullpath == NULL)
1397 goto exit;
1398
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 if (Py_VerboseFlag > 1)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001400 PySys_FormatStderr("# trying %U%c%U\n",
1401 self->archive, (int)SEP, fullpath);
1402 toc_entry = PyDict_GetItem(self->files, fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 if (toc_entry != NULL) {
1404 time_t mtime = 0;
1405 int ispackage = zso->type & IS_PACKAGE;
1406 int isbytecode = zso->type & IS_BYTECODE;
Just van Rossum52e14d62002-12-30 22:08:05 +00001407
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001408 if (isbytecode) {
1409 mtime = get_mtime_of_source(self, fullpath);
1410 if (mtime == (time_t)-1 && PyErr_Occurred()) {
1411 goto exit;
1412 }
1413 }
1414 Py_CLEAR(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 if (p_ispackage != NULL)
1416 *p_ispackage = ispackage;
Benjamin Peterson34c15402014-02-16 14:17:28 -05001417 code = get_code_from_data(self, ispackage,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 isbytecode, mtime,
1419 toc_entry);
1420 if (code == Py_None) {
1421 /* bad magic number or non-matching mtime
1422 in byte code, try next */
1423 Py_DECREF(code);
1424 continue;
1425 }
Victor Stinner08654e12010-10-18 12:09:02 +00001426 if (code != NULL && p_modpath != NULL) {
1427 *p_modpath = PyTuple_GetItem(toc_entry, 0);
1428 Py_INCREF(*p_modpath);
1429 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001430 goto exit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001431 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001432 else
1433 Py_CLEAR(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001435 PyErr_Format(ZipImportError, "can't find module %R", fullname);
1436exit:
1437 Py_DECREF(path);
1438 Py_XDECREF(fullpath);
1439 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001440}
1441
1442
1443/* Module init */
1444
1445PyDoc_STRVAR(zipimport_doc,
1446"zipimport provides support for importing Python modules from Zip archives.\n\
1447\n\
1448This module exports three objects:\n\
1449- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001450- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001451 subclass of ImportError, so it can be caught as ImportError, too.\n\
1452- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1453 info dicts, as used in zipimporter._files.\n\
1454\n\
1455It is usually not needed to use the zipimport module explicitly; it is\n\
1456used by the builtin import mechanism for sys.path items that are paths\n\
1457to Zip archives.");
1458
Martin v. Löwis1a214512008-06-11 05:26:20 +00001459static struct PyModuleDef zipimportmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001460 PyModuleDef_HEAD_INIT,
1461 "zipimport",
1462 zipimport_doc,
1463 -1,
1464 NULL,
1465 NULL,
1466 NULL,
1467 NULL,
1468 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001469};
1470
Just van Rossum52e14d62002-12-30 22:08:05 +00001471PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001472PyInit_zipimport(void)
Just van Rossum52e14d62002-12-30 22:08:05 +00001473{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001474 PyObject *mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001475
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001476 if (PyType_Ready(&ZipImporter_Type) < 0)
1477 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001478
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001479 /* Correct directory separator */
1480 zip_searchorder[0].suffix[0] = SEP;
1481 zip_searchorder[1].suffix[0] = SEP;
Just van Rossum52e14d62002-12-30 22:08:05 +00001482
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001483 mod = PyModule_Create(&zipimportmodule);
1484 if (mod == NULL)
1485 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001486
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1488 PyExc_ImportError, NULL);
1489 if (ZipImportError == NULL)
1490 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001491
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 Py_INCREF(ZipImportError);
1493 if (PyModule_AddObject(mod, "ZipImportError",
1494 ZipImportError) < 0)
1495 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001496
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 Py_INCREF(&ZipImporter_Type);
1498 if (PyModule_AddObject(mod, "zipimporter",
1499 (PyObject *)&ZipImporter_Type) < 0)
1500 return NULL;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001502 zip_directory_cache = PyDict_New();
1503 if (zip_directory_cache == NULL)
1504 return NULL;
1505 Py_INCREF(zip_directory_cache);
1506 if (PyModule_AddObject(mod, "_zip_directory_cache",
1507 zip_directory_cache) < 0)
1508 return NULL;
1509 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001510}