blob: 603efd6bbaad70e607186448593dce25e9d0a6f0 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000013 char suffix[14];
14 int type;
Just van Rossum52e14d62002-12-30 22:08:05 +000015};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
Just van Rossum52e14d62002-12-30 22:08:05 +000030};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000037 PyObject_HEAD
Victor Stinner9e40fad2010-10-18 22:34:46 +000038 PyObject *archive; /* pathname of the Zip archive,
39 decoded from the filesystem encoding */
Victor Stinner72f767e2010-10-18 11:44:21 +000040 PyObject *prefix; /* file prefix: "a/sub/directory/",
41 encoded to the filesystem encoding */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000042 PyObject *files; /* dict with file info {path: toc_entry} */
Just van Rossum52e14d62002-12-30 22:08:05 +000043};
44
Just van Rossum52e14d62002-12-30 22:08:05 +000045static PyObject *ZipImportError;
Victor Stinnerc342fca2010-10-18 11:39:05 +000046/* read_directory() cache */
Just van Rossum52e14d62002-12-30 22:08:05 +000047static PyObject *zip_directory_cache = NULL;
48
49/* forward decls */
Victor Stinner2460a432010-08-16 17:54:28 +000050static PyObject *read_directory(PyObject *archive);
Victor Stinner60fe8d92010-08-16 23:48:11 +000051static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
Victor Stinnerf6b563a2011-03-14 20:46:50 -040052static PyObject *get_module_code(ZipImporter *self, PyObject *fullname,
Victor Stinner08654e12010-10-18 12:09:02 +000053 int *p_ispackage, PyObject **p_modpath);
Just van Rossum52e14d62002-12-30 22:08:05 +000054
55
56#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
57
58
59/* zipimporter.__init__
60 Split the "subdirectory" from the Zip archive path, lookup a matching
61 entry in sys.path_importer_cache, fetch the file directory from there
62 if found, or else read it from the archive. */
63static int
64zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
65{
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010066 PyObject *path, *files, *tmp;
67 PyObject *filename = NULL;
68 Py_ssize_t len, flen;
69#ifdef ALTSEP
70 _Py_IDENTIFIER(replace);
71#endif
Just van Rossum52e14d62002-12-30 22:08:05 +000072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000073 if (!_PyArg_NoKeywords("zipimporter()", kwds))
74 return -1;
Georg Brandl02c42872005-08-26 06:42:30 +000075
Victor Stinner2b8dab72010-08-14 14:54:10 +000076 if (!PyArg_ParseTuple(args, "O&:zipimporter",
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010077 PyUnicode_FSDecoder, &path))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +000079
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010080 if (PyUnicode_READY(path) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020081 return -1;
82
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010083 len = PyUnicode_GET_LENGTH(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (len == 0) {
85 PyErr_SetString(ZipImportError, "archive path is empty");
Victor Stinner2b8dab72010-08-14 14:54:10 +000086 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 }
Just van Rossum52e14d62002-12-30 22:08:05 +000088
89#ifdef ALTSEP
Martin v. Löwiscfa61292011-10-31 09:01:22 +010090 tmp = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010091 if (!tmp)
92 goto error;
93 Py_DECREF(path);
94 path = tmp;
Just van Rossum52e14d62002-12-30 22:08:05 +000095#endif
96
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010097 filename = path;
98 Py_INCREF(filename);
99 flen = len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 for (;;) {
101 struct stat statbuf;
102 int rv;
Just van Rossum52e14d62002-12-30 22:08:05 +0000103
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100104 rv = _Py_stat(filename, &statbuf);
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100105 if (rv == -2)
106 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107 if (rv == 0) {
108 /* it exists */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100109 if (!S_ISREG(statbuf.st_mode))
110 /* it's a not file */
111 Py_CLEAR(filename);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000112 break;
113 }
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100114 Py_CLEAR(filename);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 /* back up one path element */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100116 flen = PyUnicode_FindChar(path, SEP, 0, flen, -1);
117 if (flen == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000118 break;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100119 filename = PyUnicode_Substring(path, 0, flen);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 }
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100121 if (filename == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 PyErr_SetString(ZipImportError, "not a Zip file");
Victor Stinner2b8dab72010-08-14 14:54:10 +0000123 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000125
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100126 if (PyUnicode_READY(filename) < 0)
127 goto error;
128
129 files = PyDict_GetItem(zip_directory_cache, filename);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000130 if (files == NULL) {
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100131 files = read_directory(filename);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000132 if (files == NULL)
133 goto error;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100134 if (PyDict_SetItem(zip_directory_cache, filename, files) != 0)
Victor Stinner2b8dab72010-08-14 14:54:10 +0000135 goto error;
136 }
137 else
138 Py_INCREF(files);
139 self->files = files;
140
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100141 /* Transfer reference */
142 self->archive = filename;
143 filename = NULL;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000144
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100145 /* Check if there is a prefix directory following the filename. */
146 if (flen != len) {
147 tmp = PyUnicode_Substring(path, flen+1,
148 PyUnicode_GET_LENGTH(path));
149 if (tmp == NULL)
150 goto error;
151 self->prefix = tmp;
152 if (PyUnicode_READ_CHAR(path, len-1) != SEP) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000153 /* add trailing SEP */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100154 tmp = PyUnicode_FromFormat("%U%c", self->prefix, SEP);
155 if (tmp == NULL)
156 goto error;
157 Py_DECREF(self->prefix);
158 self->prefix = tmp;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 }
160 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000161 else
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100162 self->prefix = PyUnicode_New(0, 0);
163 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000164 return 0;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000165
166error:
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100167 Py_DECREF(path);
168 Py_XDECREF(filename);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000169 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +0000170}
171
172/* GC support. */
173static int
174zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
175{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 ZipImporter *self = (ZipImporter *)obj;
177 Py_VISIT(self->files);
178 return 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000179}
180
181static void
182zipimporter_dealloc(ZipImporter *self)
183{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 PyObject_GC_UnTrack(self);
185 Py_XDECREF(self->archive);
186 Py_XDECREF(self->prefix);
187 Py_XDECREF(self->files);
188 Py_TYPE(self)->tp_free((PyObject *)self);
Just van Rossum52e14d62002-12-30 22:08:05 +0000189}
190
191static PyObject *
192zipimporter_repr(ZipImporter *self)
193{
Victor Stinner028dd972010-08-17 00:04:48 +0000194 if (self->archive == NULL)
195 return PyUnicode_FromString("<zipimporter object \"???\">");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200196 else if (self->prefix != NULL && PyUnicode_GET_LENGTH(self->prefix) != 0)
Victor Stinner07298a12010-10-18 22:45:54 +0000197 return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">",
Victor Stinner028dd972010-08-17 00:04:48 +0000198 self->archive, SEP, self->prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000199 else
Victor Stinner07298a12010-10-18 22:45:54 +0000200 return PyUnicode_FromFormat("<zipimporter object \"%U\">",
Victor Stinner028dd972010-08-17 00:04:48 +0000201 self->archive);
Just van Rossum52e14d62002-12-30 22:08:05 +0000202}
203
204/* return fullname.split(".")[-1] */
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400205static PyObject *
206get_subname(PyObject *fullname)
Just van Rossum52e14d62002-12-30 22:08:05 +0000207{
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100208 Py_ssize_t len, dot;
209 if (PyUnicode_READY(fullname) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200210 return NULL;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100211 len = PyUnicode_GET_LENGTH(fullname);
212 dot = PyUnicode_FindChar(fullname, '.', 0, len, -1);
213 if (dot == -1) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400214 Py_INCREF(fullname);
215 return fullname;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100216 } else
217 return PyUnicode_Substring(fullname, dot+1, len);
Just van Rossum52e14d62002-12-30 22:08:05 +0000218}
219
220/* Given a (sub)modulename, write the potential file path in the
221 archive (without extension) to the path buffer. Return the
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400222 length of the resulting string.
223
224 return self.prefix + name.replace('.', os.sep) */
225static PyObject*
226make_filename(PyObject *prefix, PyObject *name)
Just van Rossum52e14d62002-12-30 22:08:05 +0000227{
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400228 PyObject *pathobj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200229 Py_UCS4 *p, *buf;
230 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000231
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200232 len = PyUnicode_GET_LENGTH(prefix) + PyUnicode_GET_LENGTH(name) + 1;
233 p = buf = PyMem_Malloc(sizeof(Py_UCS4) * len);
234 if (buf == NULL) {
235 PyErr_NoMemory();
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400236 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200237 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000238
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200239 if (!PyUnicode_AsUCS4(prefix, p, len, 0)) {
240 PyMem_Free(buf);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200241 return NULL;
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200242 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200243 p += PyUnicode_GET_LENGTH(prefix);
244 len -= PyUnicode_GET_LENGTH(prefix);
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200245 if (!PyUnicode_AsUCS4(name, p, len, 1)) {
246 PyMem_Free(buf);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200247 return NULL;
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200248 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400249 for (; *p; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 if (*p == '.')
251 *p = SEP;
252 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200253 pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
254 buf, p-buf);
255 PyMem_Free(buf);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400256 return pathobj;
Just van Rossum52e14d62002-12-30 22:08:05 +0000257}
258
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000259enum zi_module_info {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000260 MI_ERROR,
261 MI_NOT_FOUND,
262 MI_MODULE,
263 MI_PACKAGE
Just van Rossum52e14d62002-12-30 22:08:05 +0000264};
265
Eric V. Smith984b11f2012-05-24 20:21:04 -0400266/* Does this path represent a directory?
267 on error, return < 0
268 if not a dir, return 0
269 if a dir, return 1
270*/
271static int
272check_is_directory(ZipImporter *self, PyObject* prefix, PyObject *path)
273{
274 PyObject *dirpath;
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700275 int res;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400276
277 /* See if this is a "directory". If so, it's eligible to be part
278 of a namespace package. We test by seeing if the name, with an
279 appended path separator, exists. */
280 dirpath = PyUnicode_FromFormat("%U%U%c", prefix, path, SEP);
281 if (dirpath == NULL)
282 return -1;
283 /* If dirpath is present in self->files, we have a directory. */
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700284 res = PyDict_Contains(self->files, dirpath);
Eric V. Smith984b11f2012-05-24 20:21:04 -0400285 Py_DECREF(dirpath);
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700286 return res;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400287}
288
Just van Rossum52e14d62002-12-30 22:08:05 +0000289/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000290static enum zi_module_info
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400291get_module_info(ZipImporter *self, PyObject *fullname)
Just van Rossum52e14d62002-12-30 22:08:05 +0000292{
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400293 PyObject *subname;
294 PyObject *path, *fullpath, *item;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000295 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +0000296
Victor Stinner965a8a12010-10-18 21:44:33 +0000297 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400298 if (subname == NULL)
299 return MI_ERROR;
Just van Rossum52e14d62002-12-30 22:08:05 +0000300
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400301 path = make_filename(self->prefix, subname);
302 Py_DECREF(subname);
303 if (path == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000304 return MI_ERROR;
Just van Rossum52e14d62002-12-30 22:08:05 +0000305
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000306 for (zso = zip_searchorder; *zso->suffix; zso++) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400307 fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
308 if (fullpath == NULL) {
309 Py_DECREF(path);
310 return MI_ERROR;
311 }
312 item = PyDict_GetItem(self->files, fullpath);
313 Py_DECREF(fullpath);
314 if (item != NULL) {
315 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000316 if (zso->type & IS_PACKAGE)
317 return MI_PACKAGE;
318 else
319 return MI_MODULE;
320 }
321 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400322 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000323 return MI_NOT_FOUND;
Just van Rossum52e14d62002-12-30 22:08:05 +0000324}
325
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700326typedef enum {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700327 FL_ERROR,
328 FL_NOT_FOUND,
329 FL_MODULE_FOUND,
330 FL_NS_FOUND
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700331} find_loader_result;
332
Eric V. Smith984b11f2012-05-24 20:21:04 -0400333/* The guts of "find_loader" and "find_module". Return values:
334 -1: error
335 0: no loader or namespace portions found
336 1: module/package found
337 2: namespace portion found: *namespace_portion will point to the name
338*/
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700339static find_loader_result
Eric V. Smith984b11f2012-05-24 20:21:04 -0400340find_loader(ZipImporter *self, PyObject *fullname, PyObject **namespace_portion)
341{
342 enum zi_module_info mi;
343
344 *namespace_portion = NULL;
345
346 mi = get_module_info(self, fullname);
347 if (mi == MI_ERROR)
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700348 return FL_ERROR;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400349 if (mi == MI_NOT_FOUND) {
350 /* Not a module or regular package. See if this is a directory, and
351 therefore possibly a portion of a namespace package. */
352 int is_dir = check_is_directory(self, self->prefix, fullname);
353 if (is_dir < 0)
354 return -1;
355 if (is_dir) {
356 /* This is possibly a portion of a namespace
357 package. Return the string representing its path,
358 without a trailing separator. */
359 *namespace_portion = PyUnicode_FromFormat("%U%c%U%U",
360 self->archive, SEP,
361 self->prefix, fullname);
362 if (*namespace_portion == NULL)
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700363 return FL_ERROR;
364 return FL_NS_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400365 }
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700366 return FL_NOT_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400367 }
368 /* This is a module or package. */
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700369 return FL_MODULE_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400370}
371
372
Just van Rossum52e14d62002-12-30 22:08:05 +0000373/* Check whether we can satisfy the import of the module named by
374 'fullname'. Return self if we can, None if we can't. */
375static PyObject *
376zipimporter_find_module(PyObject *obj, PyObject *args)
377{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000378 ZipImporter *self = (ZipImporter *)obj;
379 PyObject *path = NULL;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400380 PyObject *fullname;
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700381 PyObject *namespace_portion = NULL;
382 PyObject *result = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000383
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700384 if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module", &fullname, &path))
385 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000386
Eric V. Smith984b11f2012-05-24 20:21:04 -0400387 switch (find_loader(self, fullname, &namespace_portion)) {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700388 case FL_ERROR:
Benjamin Petersona6a7a1a2012-05-25 00:22:04 -0700389 return NULL;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700390 case FL_NS_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700391 /* A namespace portion is not allowed via find_module, so return None. */
Eric V. Smith984b11f2012-05-24 20:21:04 -0400392 Py_DECREF(namespace_portion);
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700393 /* FALL THROUGH */
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700394 case FL_NOT_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700395 result = Py_None;
396 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700397 case FL_MODULE_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700398 result = (PyObject *)self;
399 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 }
Benjamin Petersona6a7a1a2012-05-25 00:22:04 -0700401 Py_INCREF(result);
Benjamin Peterson2d12e142012-05-25 00:19:40 -0700402 return result;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400403}
404
405
406/* Check whether we can satisfy the import of the module named by
407 'fullname', or whether it could be a portion of a namespace
408 package. Return self if we can load it, a string containing the
409 full path if it's a possible namespace portion, None if we
410 can't load it. */
411static PyObject *
412zipimporter_find_loader(PyObject *obj, PyObject *args)
413{
414 ZipImporter *self = (ZipImporter *)obj;
415 PyObject *path = NULL;
416 PyObject *fullname;
417 PyObject *result = NULL;
418 PyObject *namespace_portion = NULL;
419
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700420 if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module", &fullname, &path))
421 return NULL;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400422
423 switch (find_loader(self, fullname, &namespace_portion)) {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700424 case FL_ERROR:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700425 return NULL;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700426 case FL_NOT_FOUND: /* Not found, return (None, []) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700427 result = Py_BuildValue("O[]", Py_None);
428 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700429 case FL_MODULE_FOUND: /* Return (self, []) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700430 result = Py_BuildValue("O[]", self);
431 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700432 case FL_NS_FOUND: /* Return (None, [namespace_portion]) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700433 result = Py_BuildValue("O[O]", Py_None, namespace_portion);
Benjamin Peterson209e04c2012-05-24 22:35:39 -0700434 Py_DECREF(namespace_portion);
Eric V. Smith984b11f2012-05-24 20:21:04 -0400435 return result;
436 }
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700437 return result;
Just van Rossum52e14d62002-12-30 22:08:05 +0000438}
439
440/* Load and return the module named by 'fullname'. */
441static PyObject *
442zipimporter_load_module(PyObject *obj, PyObject *args)
443{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 ZipImporter *self = (ZipImporter *)obj;
Victor Stinner26fabe12010-10-18 12:03:25 +0000445 PyObject *code = NULL, *mod, *dict;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400446 PyObject *fullname;
447 PyObject *modpath = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000448 int ispackage;
Just van Rossum52e14d62002-12-30 22:08:05 +0000449
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400450 if (!PyArg_ParseTuple(args, "U:zipimporter.load_module",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000451 &fullname))
452 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200453 if (PyUnicode_READY(fullname) == -1)
454 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000455
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 code = get_module_code(self, fullname, &ispackage, &modpath);
457 if (code == NULL)
Victor Stinner26fabe12010-10-18 12:03:25 +0000458 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000459
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400460 mod = PyImport_AddModuleObject(fullname);
Victor Stinner26fabe12010-10-18 12:03:25 +0000461 if (mod == NULL)
462 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000463 dict = PyModule_GetDict(mod);
Just van Rossum52e14d62002-12-30 22:08:05 +0000464
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 /* mod.__loader__ = self */
466 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
467 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000468
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000469 if (ispackage) {
470 /* add __path__ to the module *before* the code gets
471 executed */
472 PyObject *pkgpath, *fullpath;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400473 PyObject *subname = get_subname(fullname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000475
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400476 fullpath = PyUnicode_FromFormat("%U%c%U%U",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000477 self->archive, SEP,
478 self->prefix, subname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400479 Py_DECREF(subname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000480 if (fullpath == NULL)
481 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000482
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400483 pkgpath = Py_BuildValue("[N]", fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000484 if (pkgpath == NULL)
485 goto error;
486 err = PyDict_SetItemString(dict, "__path__", pkgpath);
487 Py_DECREF(pkgpath);
488 if (err != 0)
489 goto error;
490 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400491 mod = PyImport_ExecCodeModuleObject(fullname, code, modpath, NULL);
Victor Stinner26fabe12010-10-18 12:03:25 +0000492 Py_CLEAR(code);
493 if (mod == NULL)
494 goto error;
495
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000496 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400497 PySys_FormatStderr("import %U # loaded from Zip %U\n",
Victor Stinner08654e12010-10-18 12:09:02 +0000498 fullname, modpath);
499 Py_DECREF(modpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +0000501error:
Victor Stinner26fabe12010-10-18 12:03:25 +0000502 Py_XDECREF(code);
Victor Stinner08654e12010-10-18 12:09:02 +0000503 Py_XDECREF(modpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000505}
506
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000507/* Return a string matching __file__ for the named module */
508static PyObject *
509zipimporter_get_filename(PyObject *obj, PyObject *args)
510{
511 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400512 PyObject *fullname, *code, *modpath;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000513 int ispackage;
514
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400515 if (!PyArg_ParseTuple(args, "U:zipimporter.get_filename",
Victor Stinner9e40fad2010-10-18 22:34:46 +0000516 &fullname))
Victor Stinnerc342fca2010-10-18 11:39:05 +0000517 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000518
519 /* Deciding the filename requires working out where the code
520 would come from if the module was actually loaded */
521 code = get_module_code(self, fullname, &ispackage, &modpath);
522 if (code == NULL)
Victor Stinnerc342fca2010-10-18 11:39:05 +0000523 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000524 Py_DECREF(code); /* Only need the path info */
525
Victor Stinner08654e12010-10-18 12:09:02 +0000526 return modpath;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000527}
528
Just van Rossum52e14d62002-12-30 22:08:05 +0000529/* Return a bool signifying whether the module is a package or not. */
530static PyObject *
531zipimporter_is_package(PyObject *obj, PyObject *args)
532{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400534 PyObject *fullname;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000535 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000536
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400537 if (!PyArg_ParseTuple(args, "U:zipimporter.is_package",
Victor Stinner965a8a12010-10-18 21:44:33 +0000538 &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000540
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000541 mi = get_module_info(self, fullname);
542 if (mi == MI_ERROR)
Victor Stinner965a8a12010-10-18 21:44:33 +0000543 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000544 if (mi == MI_NOT_FOUND) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400545 PyErr_Format(ZipImportError, "can't find module %R", fullname);
Victor Stinner965a8a12010-10-18 21:44:33 +0000546 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000547 }
548 return PyBool_FromLong(mi == MI_PACKAGE);
Just van Rossum52e14d62002-12-30 22:08:05 +0000549}
550
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200551
Just van Rossum52e14d62002-12-30 22:08:05 +0000552static PyObject *
553zipimporter_get_data(PyObject *obj, PyObject *args)
554{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000555 ZipImporter *self = (ZipImporter *)obj;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100556 PyObject *path, *key;
Just van Rossum52e14d62002-12-30 22:08:05 +0000557#ifdef ALTSEP
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100558 _Py_IDENTIFIER(replace);
Just van Rossum52e14d62002-12-30 22:08:05 +0000559#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000560 PyObject *toc_entry;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100561 Py_ssize_t path_start, path_len, len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000562
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100563 if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &path))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000564 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000565
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200566#ifdef ALTSEP
Martin v. Löwiscfa61292011-10-31 09:01:22 +0100567 path = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100568 if (!path)
569 return NULL;
570#else
571 Py_INCREF(path);
Just van Rossum52e14d62002-12-30 22:08:05 +0000572#endif
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100573 if (PyUnicode_READY(path) == -1)
574 goto error;
575
576 path_len = PyUnicode_GET_LENGTH(path);
577
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200578 len = PyUnicode_GET_LENGTH(self->archive);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100579 path_start = 0;
580 if (PyUnicode_Tailmatch(path, self->archive, 0, len, -1)
581 && PyUnicode_READ_CHAR(path, len) == SEP) {
582 path_start = len + 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000583 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000584
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100585 key = PyUnicode_Substring(path, path_start, path_len);
Victor Stinner60fe8d92010-08-16 23:48:11 +0000586 if (key == NULL)
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100587 goto error;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000588 toc_entry = PyDict_GetItem(self->files, key);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000589 if (toc_entry == NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000590 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key);
591 Py_DECREF(key);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100592 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000593 }
Victor Stinner60fe8d92010-08-16 23:48:11 +0000594 Py_DECREF(key);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100595 Py_DECREF(path);
Victor Stinner60fe8d92010-08-16 23:48:11 +0000596 return get_data(self->archive, toc_entry);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100597 error:
598 Py_DECREF(path);
599 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000600}
601
602static PyObject *
603zipimporter_get_code(PyObject *obj, PyObject *args)
604{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400606 PyObject *fullname;
Just van Rossum52e14d62002-12-30 22:08:05 +0000607
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400608 if (!PyArg_ParseTuple(args, "U:zipimporter.get_code", &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000609 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000611 return get_module_code(self, fullname, NULL, NULL);
Just van Rossum52e14d62002-12-30 22:08:05 +0000612}
613
614static PyObject *
615zipimporter_get_source(PyObject *obj, PyObject *args)
616{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000617 ZipImporter *self = (ZipImporter *)obj;
618 PyObject *toc_entry;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400619 PyObject *fullname, *subname, *path, *fullpath;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000620 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000621
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400622 if (!PyArg_ParseTuple(args, "U:zipimporter.get_source", &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000623 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000624
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000625 mi = get_module_info(self, fullname);
Victor Stinner965a8a12010-10-18 21:44:33 +0000626 if (mi == MI_ERROR)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000627 return NULL;
Victor Stinner04106562010-10-18 20:44:08 +0000628 if (mi == MI_NOT_FOUND) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400629 PyErr_Format(ZipImportError, "can't find module %R", fullname);
Victor Stinner04106562010-10-18 20:44:08 +0000630 return NULL;
631 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400632
Victor Stinner965a8a12010-10-18 21:44:33 +0000633 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400634 if (subname == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000635 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000636
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400637 path = make_filename(self->prefix, subname);
638 Py_DECREF(subname);
639 if (path == NULL)
640 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000641
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400642 if (mi == MI_PACKAGE)
643 fullpath = PyUnicode_FromFormat("%U%c__init__.py", path, SEP);
644 else
645 fullpath = PyUnicode_FromFormat("%U.py", path);
646 Py_DECREF(path);
647 if (fullpath == NULL)
648 return NULL;
649
650 toc_entry = PyDict_GetItem(self->files, fullpath);
651 Py_DECREF(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 if (toc_entry != NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000653 PyObject *res, *bytes;
654 bytes = get_data(self->archive, toc_entry);
655 if (bytes == NULL)
656 return NULL;
657 res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
658 PyBytes_GET_SIZE(bytes));
659 Py_DECREF(bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000660 return res;
661 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000662
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 /* we have the module, but no source */
664 Py_INCREF(Py_None);
665 return Py_None;
Just van Rossum52e14d62002-12-30 22:08:05 +0000666}
667
668PyDoc_STRVAR(doc_find_module,
669"find_module(fullname, path=None) -> self or None.\n\
670\n\
671Search for a module specified by 'fullname'. 'fullname' must be the\n\
672fully qualified (dotted) module name. It returns the zipimporter\n\
673instance itself if the module was found, or None if it wasn't.\n\
674The optional 'path' argument is ignored -- it's there for compatibility\n\
675with the importer protocol.");
676
Eric V. Smith984b11f2012-05-24 20:21:04 -0400677PyDoc_STRVAR(doc_find_loader,
678"find_loader(fullname, path=None) -> self, str or None.\n\
679\n\
680Search for a module specified by 'fullname'. 'fullname' must be the\n\
681fully qualified (dotted) module name. It returns the zipimporter\n\
682instance itself if the module was found, a string containing the\n\
683full path name if it's possibly a portion of a namespace package,\n\
684or None otherwise. The optional 'path' argument is ignored -- it's\n\
685 there for compatibility with the importer protocol.");
686
Just van Rossum52e14d62002-12-30 22:08:05 +0000687PyDoc_STRVAR(doc_load_module,
688"load_module(fullname) -> module.\n\
689\n\
690Load the module specified by 'fullname'. 'fullname' must be the\n\
691fully qualified (dotted) module name. It returns the imported\n\
692module, or raises ZipImportError if it wasn't found.");
693
694PyDoc_STRVAR(doc_get_data,
695"get_data(pathname) -> string with file data.\n\
696\n\
697Return the data associated with 'pathname'. Raise IOError if\n\
698the file wasn't found.");
699
700PyDoc_STRVAR(doc_is_package,
701"is_package(fullname) -> bool.\n\
702\n\
703Return True if the module specified by fullname is a package.\n\
Brian Curtin32839732010-07-21 01:44:19 +0000704Raise ZipImportError if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000705
706PyDoc_STRVAR(doc_get_code,
707"get_code(fullname) -> code object.\n\
708\n\
709Return the code object for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000710if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000711
712PyDoc_STRVAR(doc_get_source,
713"get_source(fullname) -> source string.\n\
714\n\
715Return the source code for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000716if the module couldn't be found, return None if the archive does\n\
Just van Rossum52e14d62002-12-30 22:08:05 +0000717contain the module, but has no source for it.");
718
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000719
720PyDoc_STRVAR(doc_get_filename,
Nick Coghlan9a1d6e32009-02-08 03:37:27 +0000721"get_filename(fullname) -> filename string.\n\
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000722\n\
723Return the filename for the specified module.");
724
Just van Rossum52e14d62002-12-30 22:08:05 +0000725static PyMethodDef zipimporter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 {"find_module", zipimporter_find_module, METH_VARARGS,
727 doc_find_module},
Eric V. Smith984b11f2012-05-24 20:21:04 -0400728 {"find_loader", zipimporter_find_loader, METH_VARARGS,
729 doc_find_loader},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000730 {"load_module", zipimporter_load_module, METH_VARARGS,
731 doc_load_module},
732 {"get_data", zipimporter_get_data, METH_VARARGS,
733 doc_get_data},
734 {"get_code", zipimporter_get_code, METH_VARARGS,
735 doc_get_code},
736 {"get_source", zipimporter_get_source, METH_VARARGS,
737 doc_get_source},
738 {"get_filename", zipimporter_get_filename, METH_VARARGS,
739 doc_get_filename},
740 {"is_package", zipimporter_is_package, METH_VARARGS,
741 doc_is_package},
742 {NULL, NULL} /* sentinel */
Just van Rossum52e14d62002-12-30 22:08:05 +0000743};
744
745static PyMemberDef zipimporter_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000746 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
747 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
748 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
749 {NULL}
Just van Rossum52e14d62002-12-30 22:08:05 +0000750};
751
752PyDoc_STRVAR(zipimporter_doc,
753"zipimporter(archivepath) -> zipimporter object\n\
754\n\
755Create a new zipimporter instance. 'archivepath' must be a path to\n\
Alexandre Vassalotti8ae3e052008-05-16 00:41:41 +0000756a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
757'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
758valid directory inside the archive.\n\
759\n\
760'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
761archive.\n\
762\n\
763The 'archive' attribute of zipimporter objects contains the name of the\n\
764zipfile targeted.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000765
766#define DEFERRED_ADDRESS(ADDR) 0
767
768static PyTypeObject ZipImporter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000769 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
770 "zipimport.zipimporter",
771 sizeof(ZipImporter),
772 0, /* tp_itemsize */
773 (destructor)zipimporter_dealloc, /* tp_dealloc */
774 0, /* tp_print */
775 0, /* tp_getattr */
776 0, /* tp_setattr */
777 0, /* tp_reserved */
778 (reprfunc)zipimporter_repr, /* tp_repr */
779 0, /* tp_as_number */
780 0, /* tp_as_sequence */
781 0, /* tp_as_mapping */
782 0, /* tp_hash */
783 0, /* tp_call */
784 0, /* tp_str */
785 PyObject_GenericGetAttr, /* tp_getattro */
786 0, /* tp_setattro */
787 0, /* tp_as_buffer */
788 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
789 Py_TPFLAGS_HAVE_GC, /* tp_flags */
790 zipimporter_doc, /* tp_doc */
791 zipimporter_traverse, /* tp_traverse */
792 0, /* tp_clear */
793 0, /* tp_richcompare */
794 0, /* tp_weaklistoffset */
795 0, /* tp_iter */
796 0, /* tp_iternext */
797 zipimporter_methods, /* tp_methods */
798 zipimporter_members, /* tp_members */
799 0, /* tp_getset */
800 0, /* tp_base */
801 0, /* tp_dict */
802 0, /* tp_descr_get */
803 0, /* tp_descr_set */
804 0, /* tp_dictoffset */
805 (initproc)zipimporter_init, /* tp_init */
806 PyType_GenericAlloc, /* tp_alloc */
807 PyType_GenericNew, /* tp_new */
808 PyObject_GC_Del, /* tp_free */
Just van Rossum52e14d62002-12-30 22:08:05 +0000809};
810
811
812/* implementation */
813
Just van Rossum52e14d62002-12-30 22:08:05 +0000814/* Given a buffer, return the long that is represented by the first
815 4 bytes, encoded as little endian. This partially reimplements
816 marshal.c:r_long() */
817static long
818get_long(unsigned char *buf) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000819 long x;
820 x = buf[0];
821 x |= (long)buf[1] << 8;
822 x |= (long)buf[2] << 16;
823 x |= (long)buf[3] << 24;
Just van Rossum52e14d62002-12-30 22:08:05 +0000824#if SIZEOF_LONG > 4
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000825 /* Sign extension for 64-bit machines */
826 x |= -(x & 0x80000000L);
Just van Rossum52e14d62002-12-30 22:08:05 +0000827#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000828 return x;
Just van Rossum52e14d62002-12-30 22:08:05 +0000829}
830
831/*
832 read_directory(archive) -> files dict (new reference)
833
834 Given a path to a Zip archive, build a dict, mapping file names
835 (local to the archive, using SEP as a separator) to toc entries.
836
837 A toc_entry is a tuple:
838
Victor Stinner08654e12010-10-18 12:09:02 +0000839 (__file__, # value to use for __file__, available for all files,
840 # encoded to the filesystem encoding
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000841 compress, # compression kind; 0 for uncompressed
842 data_size, # size of compressed data on disk
843 file_size, # size of decompressed data
844 file_offset, # offset of file header from start of archive
845 time, # mod time of file (in dos format)
846 date, # mod data of file (in dos format)
847 crc, # crc checksum of the data
Victor Stinnerc342fca2010-10-18 11:39:05 +0000848 )
Just van Rossum52e14d62002-12-30 22:08:05 +0000849
850 Directories can be recognized by the trailing SEP in the name,
851 data_size and file_offset are 0.
852*/
853static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400854read_directory(PyObject *archive)
Just van Rossum52e14d62002-12-30 22:08:05 +0000855{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 PyObject *files = NULL;
857 FILE *fp;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000858 unsigned short flags;
Gregory P. Smithab320662012-01-30 15:17:33 -0800859 short compress, time, date, name_size;
860 long crc, data_size, file_size, header_size;
861 Py_ssize_t file_offset, header_position, header_offset;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200862 long l, count;
863 Py_ssize_t i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000864 char name[MAXPATHLEN + 5];
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200865 char dummy[8]; /* Buffer to read unused header values into */
Victor Stinner2460a432010-08-16 17:54:28 +0000866 PyObject *nameobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 char *p, endof_central_dir[22];
Gregory P. Smithab320662012-01-30 15:17:33 -0800868 Py_ssize_t arc_offset; /* Absolute offset to start of the zip-archive. */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100869 PyObject *path;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000870 const char *charset;
Victor Stinner4ee65a92011-01-22 10:30:29 +0000871 int bootstrap;
Just van Rossum52e14d62002-12-30 22:08:05 +0000872
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400873 fp = _Py_fopen(archive, "rb");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 if (fp == NULL) {
Victor Stinnerbd206e22011-12-18 21:04:17 +0100875 if (!PyErr_Occurred())
Victor Stinner35734762011-12-18 21:05:22 +0100876 PyErr_Format(ZipImportError, "can't open Zip file: %R", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000877 return NULL;
878 }
Jesus Cea09bf7a72012-10-03 02:13:05 +0200879
880 if (fseek(fp, -22, SEEK_END) == -1) {
881 fclose(fp);
882 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
883 return NULL;
884 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000885 header_position = ftell(fp);
886 if (fread(endof_central_dir, 1, 22, fp) != 22) {
887 fclose(fp);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400888 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000889 return NULL;
890 }
891 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
892 /* Bad: End of Central Dir signature */
893 fclose(fp);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400894 PyErr_Format(ZipImportError, "not a Zip file: %R", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000895 return NULL;
896 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000897
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000898 header_size = get_long((unsigned char *)endof_central_dir + 12);
899 header_offset = get_long((unsigned char *)endof_central_dir + 16);
900 arc_offset = header_position - header_offset - header_size;
901 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000902
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 files = PyDict_New();
904 if (files == NULL)
905 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000906
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000907 /* Start of Central Directory */
908 count = 0;
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200909 if (fseek(fp, header_offset, 0) == -1)
910 goto file_error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000911 for (;;) {
912 PyObject *t;
913 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000914
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200915 /* Start of file header */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000916 l = PyMarshal_ReadLongFromFile(fp);
917 if (l != 0x02014B50)
918 break; /* Bad: Central Dir File Header */
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200919
920 /* On Windows, calling fseek to skip over the fields we don't use is
921 slower than reading the data into a dummy buffer because fseek flushes
922 stdio's internal buffers. See issue #8745. */
923 if (fread(dummy, 1, 4, fp) != 4) /* Skip unused fields, avoid fseek */
924 goto file_error;
925
Victor Stinnerd36c8212010-10-18 12:13:46 +0000926 flags = (unsigned short)PyMarshal_ReadShortFromFile(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000927 compress = PyMarshal_ReadShortFromFile(fp);
928 time = PyMarshal_ReadShortFromFile(fp);
929 date = PyMarshal_ReadShortFromFile(fp);
930 crc = PyMarshal_ReadLongFromFile(fp);
931 data_size = PyMarshal_ReadLongFromFile(fp);
932 file_size = PyMarshal_ReadLongFromFile(fp);
933 name_size = PyMarshal_ReadShortFromFile(fp);
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200934 header_size = name_size +
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000935 PyMarshal_ReadShortFromFile(fp) +
936 PyMarshal_ReadShortFromFile(fp);
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200937 if (fread(dummy, 1, 8, fp) != 8) /* Skip unused fields, avoid fseek */
938 goto file_error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000939 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
940 if (name_size > MAXPATHLEN)
941 name_size = MAXPATHLEN;
Just van Rossum52e14d62002-12-30 22:08:05 +0000942
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000943 p = name;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200944 for (i = 0; i < (Py_ssize_t)name_size; i++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000945 *p = (char)getc(fp);
946 if (*p == '/')
947 *p = SEP;
948 p++;
949 }
950 *p = 0; /* Add terminating null byte */
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200951 for (; i < header_size; i++) /* Skip the rest of the header */
952 if(getc(fp) == EOF) /* Avoid fseek */
953 goto file_error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000954
Victor Stinner4ee65a92011-01-22 10:30:29 +0000955 bootstrap = 0;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000956 if (flags & 0x0800)
957 charset = "utf-8";
Victor Stinner4ee65a92011-01-22 10:30:29 +0000958 else if (!PyThreadState_GET()->interp->codecs_initialized) {
959 /* During bootstrap, we may need to load the encodings
960 package from a ZIP file. But the cp437 encoding is implemented
961 in Python in the encodings package.
962
963 Break out of this dependency by assuming that the path to
964 the encodings module is ASCII-only. */
965 charset = "ascii";
966 bootstrap = 1;
967 }
Victor Stinnerd36c8212010-10-18 12:13:46 +0000968 else
969 charset = "cp437";
970 nameobj = PyUnicode_Decode(name, name_size, charset, NULL);
Victor Stinner4ee65a92011-01-22 10:30:29 +0000971 if (nameobj == NULL) {
972 if (bootstrap)
973 PyErr_Format(PyExc_NotImplementedError,
974 "bootstrap issue: python%i%i.zip contains non-ASCII "
975 "filenames without the unicode flag",
976 PY_MAJOR_VERSION, PY_MINOR_VERSION);
Victor Stinner2460a432010-08-16 17:54:28 +0000977 goto error;
Victor Stinner4ee65a92011-01-22 10:30:29 +0000978 }
Stefan Krah000fde92012-08-20 14:14:49 +0200979 if (PyUnicode_READY(nameobj) == -1)
980 goto error;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100981 path = PyUnicode_FromFormat("%U%c%U", archive, SEP, nameobj);
982 if (path == NULL)
Victor Stinner2460a432010-08-16 17:54:28 +0000983 goto error;
Gregory P. Smithcc6abd52012-01-30 15:55:29 -0800984 t = Py_BuildValue("Nhllnhhl", path, compress, data_size,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000985 file_size, file_offset, time, date, crc);
986 if (t == NULL)
987 goto error;
Victor Stinner2460a432010-08-16 17:54:28 +0000988 err = PyDict_SetItem(files, nameobj, t);
989 Py_CLEAR(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000990 Py_DECREF(t);
991 if (err != 0)
992 goto error;
993 count++;
994 }
995 fclose(fp);
996 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400997 PySys_FormatStderr("# zipimport: found %ld names in %R\n",
998 count, archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000999 return files;
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +02001000file_error:
Jesus Cea09bf7a72012-10-03 02:13:05 +02001001 fclose(fp);
1002 Py_XDECREF(files);
1003 Py_XDECREF(nameobj);
1004 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1005 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001006error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001007 fclose(fp);
1008 Py_XDECREF(files);
Victor Stinner2460a432010-08-16 17:54:28 +00001009 Py_XDECREF(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001010 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001011}
1012
1013/* Return the zlib.decompress function object, or NULL if zlib couldn't
1014 be imported. The function is cached when found, so subsequent calls
Victor Stinner4925cde2011-05-20 00:16:09 +02001015 don't import zlib again. */
Just van Rossum52e14d62002-12-30 22:08:05 +00001016static PyObject *
1017get_decompress_func(void)
1018{
Victor Stinner4925cde2011-05-20 00:16:09 +02001019 static int importing_zlib = 0;
1020 PyObject *zlib;
1021 PyObject *decompress;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001022 _Py_IDENTIFIER(decompress);
Just van Rossum52e14d62002-12-30 22:08:05 +00001023
Victor Stinner4925cde2011-05-20 00:16:09 +02001024 if (importing_zlib != 0)
1025 /* Someone has a zlib.py[co] in their Zip file;
1026 let's avoid a stack overflow. */
1027 return NULL;
1028 importing_zlib = 1;
1029 zlib = PyImport_ImportModuleNoBlock("zlib");
1030 importing_zlib = 0;
1031 if (zlib != NULL) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001032 decompress = _PyObject_GetAttrId(zlib,
1033 &PyId_decompress);
Victor Stinner4925cde2011-05-20 00:16:09 +02001034 Py_DECREF(zlib);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001035 }
Victor Stinner4925cde2011-05-20 00:16:09 +02001036 else {
1037 PyErr_Clear();
1038 decompress = NULL;
1039 }
1040 if (Py_VerboseFlag)
1041 PySys_WriteStderr("# zipimport: zlib %s\n",
1042 zlib != NULL ? "available": "UNAVAILABLE");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001043 return decompress;
Just van Rossum52e14d62002-12-30 22:08:05 +00001044}
1045
1046/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
1047 data as a new reference. */
1048static PyObject *
Victor Stinner60fe8d92010-08-16 23:48:11 +00001049get_data(PyObject *archive, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +00001050{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001051 PyObject *raw_data, *data = NULL, *decompress;
1052 char *buf;
1053 FILE *fp;
1054 int err;
1055 Py_ssize_t bytes_read = 0;
1056 long l;
Victor Stinner60fe8d92010-08-16 23:48:11 +00001057 PyObject *datapath;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001058 long compress, data_size, file_size, file_offset, bytes_size;
1059 long time, date, crc;
Just van Rossum52e14d62002-12-30 22:08:05 +00001060
Victor Stinner60fe8d92010-08-16 23:48:11 +00001061 if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001062 &data_size, &file_size, &file_offset, &time,
1063 &date, &crc)) {
1064 return NULL;
1065 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001066
Victor Stinner60fe8d92010-08-16 23:48:11 +00001067 fp = _Py_fopen(archive, "rb");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 if (!fp) {
Victor Stinnerbd206e22011-12-18 21:04:17 +01001069 if (!PyErr_Occurred())
1070 PyErr_Format(PyExc_IOError,
1071 "zipimport: can not open file %U", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 return NULL;
1073 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 /* Check to make sure the local file header is correct */
Jesus Cea09bf7a72012-10-03 02:13:05 +02001076 if (fseek(fp, file_offset, 0) == -1) {
1077 fclose(fp);
1078 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1079 return NULL;
1080 }
1081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 l = PyMarshal_ReadLongFromFile(fp);
1083 if (l != 0x04034B50) {
1084 /* Bad: Local File Header */
1085 PyErr_Format(ZipImportError,
Victor Stinner60fe8d92010-08-16 23:48:11 +00001086 "bad local file header in %U",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 archive);
1088 fclose(fp);
1089 return NULL;
1090 }
Jesus Cea09bf7a72012-10-03 02:13:05 +02001091 if (fseek(fp, file_offset + 26, 0) == -1) {
1092 fclose(fp);
1093 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1094 return NULL;
1095 }
1096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 l = 30 + PyMarshal_ReadShortFromFile(fp) +
1098 PyMarshal_ReadShortFromFile(fp); /* local header size */
1099 file_offset += l; /* Start of file data */
Just van Rossum52e14d62002-12-30 22:08:05 +00001100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 bytes_size = compress == 0 ? data_size : data_size + 1;
1102 if (bytes_size == 0)
1103 bytes_size++;
1104 raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
Just van Rossum52e14d62002-12-30 22:08:05 +00001105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 if (raw_data == NULL) {
1107 fclose(fp);
1108 return NULL;
1109 }
1110 buf = PyBytes_AsString(raw_data);
Just van Rossum52e14d62002-12-30 22:08:05 +00001111
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001112 err = fseek(fp, file_offset, 0);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001113 if (err == 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 bytes_read = fread(buf, 1, data_size, fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001115 } else {
1116 fclose(fp);
1117 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1118 return NULL;
1119 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001120 fclose(fp);
1121 if (err || bytes_read != data_size) {
1122 PyErr_SetString(PyExc_IOError,
1123 "zipimport: can't read data");
1124 Py_DECREF(raw_data);
1125 return NULL;
1126 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 if (compress != 0) {
1129 buf[data_size] = 'Z'; /* saw this in zipfile.py */
1130 data_size++;
1131 }
1132 buf[data_size] = '\0';
Just van Rossum52e14d62002-12-30 22:08:05 +00001133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 if (compress == 0) { /* data is not compressed */
1135 data = PyBytes_FromStringAndSize(buf, data_size);
1136 Py_DECREF(raw_data);
1137 return data;
1138 }
1139
1140 /* Decompress with zlib */
1141 decompress = get_decompress_func();
1142 if (decompress == NULL) {
1143 PyErr_SetString(ZipImportError,
1144 "can't decompress data; "
1145 "zlib not available");
1146 goto error;
1147 }
1148 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Victor Stinner4925cde2011-05-20 00:16:09 +02001149 Py_DECREF(decompress);
Just van Rossum52e14d62002-12-30 22:08:05 +00001150error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 Py_DECREF(raw_data);
1152 return data;
Just van Rossum52e14d62002-12-30 22:08:05 +00001153}
1154
1155/* Lenient date/time comparison function. The precision of the mtime
1156 in the archive is lower than the mtime stored in a .pyc: we
1157 must allow a difference of at most one second. */
1158static int
1159eq_mtime(time_t t1, time_t t2)
1160{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 time_t d = t1 - t2;
1162 if (d < 0)
1163 d = -d;
1164 /* dostime only stores even seconds, so be lenient */
1165 return d <= 1;
Just van Rossum52e14d62002-12-30 22:08:05 +00001166}
1167
1168/* Given the contents of a .py[co] file in a buffer, unmarshal the data
1169 and return the code object. Return None if it the magic word doesn't
1170 match (we do this instead of raising an exception as we fall back
1171 to .py if available and we don't want to mask other errors).
1172 Returns a new reference. */
1173static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001174unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
Just van Rossum52e14d62002-12-30 22:08:05 +00001175{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001176 PyObject *code;
1177 char *buf = PyBytes_AsString(data);
1178 Py_ssize_t size = PyBytes_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +00001179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001180 if (size <= 9) {
1181 PyErr_SetString(ZipImportError,
1182 "bad pyc data");
1183 return NULL;
1184 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001185
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001186 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
1187 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001188 PySys_FormatStderr("# %R has bad magic\n",
1189 pathname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001190 Py_INCREF(Py_None);
1191 return Py_None; /* signal caller to try alternative */
1192 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001193
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001194 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
1195 mtime)) {
1196 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001197 PySys_FormatStderr("# %R has bad mtime\n",
1198 pathname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 Py_INCREF(Py_None);
1200 return Py_None; /* signal caller to try alternative */
1201 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001202
Antoine Pitrou5136ac02012-01-13 18:52:16 +01001203 /* XXX the pyc's size field is ignored; timestamp collisions are probably
1204 unimportant with zip files. */
1205 code = PyMarshal_ReadObjectFromString(buf + 12, size - 12);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 if (code == NULL)
1207 return NULL;
1208 if (!PyCode_Check(code)) {
1209 Py_DECREF(code);
1210 PyErr_Format(PyExc_TypeError,
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001211 "compiled module %R is not a code object",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001212 pathname);
1213 return NULL;
1214 }
1215 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001216}
1217
1218/* Replace any occurances of "\r\n?" in the input string with "\n".
1219 This converts DOS and Mac line endings to Unix line endings.
1220 Also append a trailing "\n" to be compatible with
1221 PyParser_SimpleParseFile(). Returns a new reference. */
1222static PyObject *
1223normalize_line_endings(PyObject *source)
1224{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001225 char *buf, *q, *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 PyObject *fixed_source;
1227 int len = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +00001228
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001229 p = PyBytes_AsString(source);
1230 if (p == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 return PyBytes_FromStringAndSize("\n\0", 2);
1232 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001233
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 /* one char extra for trailing \n and one for terminating \0 */
1235 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1236 if (buf == NULL) {
1237 PyErr_SetString(PyExc_MemoryError,
1238 "zipimport: no memory to allocate "
1239 "source buffer");
1240 return NULL;
1241 }
1242 /* replace "\r\n?" by "\n" */
1243 for (q = buf; *p != '\0'; p++) {
1244 if (*p == '\r') {
1245 *q++ = '\n';
1246 if (*(p + 1) == '\n')
1247 p++;
1248 }
1249 else
1250 *q++ = *p;
1251 len++;
1252 }
1253 *q++ = '\n'; /* add trailing \n */
1254 *q = '\0';
1255 fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1256 PyMem_Free(buf);
1257 return fixed_source;
Just van Rossum52e14d62002-12-30 22:08:05 +00001258}
1259
1260/* Given a string buffer containing Python source code, compile it
Brett Cannon83358c92013-06-20 21:30:32 -04001261 and return a code object as a new reference. */
Just van Rossum52e14d62002-12-30 22:08:05 +00001262static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001263compile_source(PyObject *pathname, PyObject *source)
Just van Rossum52e14d62002-12-30 22:08:05 +00001264{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001265 PyObject *code, *fixed_source, *pathbytes;
Just van Rossum52e14d62002-12-30 22:08:05 +00001266
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001267 pathbytes = PyUnicode_EncodeFSDefault(pathname);
1268 if (pathbytes == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001269 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001270
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001271 fixed_source = normalize_line_endings(source);
1272 if (fixed_source == NULL) {
1273 Py_DECREF(pathbytes);
1274 return NULL;
1275 }
1276
1277 code = Py_CompileString(PyBytes_AsString(fixed_source),
1278 PyBytes_AsString(pathbytes),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001279 Py_file_input);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001280 Py_DECREF(pathbytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 Py_DECREF(fixed_source);
1282 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001283}
1284
1285/* Convert the date/time values found in the Zip archive to a value
1286 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001287static time_t
1288parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001289{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001290 struct tm stm;
Just van Rossum52e14d62002-12-30 22:08:05 +00001291
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001292 memset((void *) &stm, '\0', sizeof(stm));
Christian Heimes679db4a2008-01-18 09:56:22 +00001293
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001294 stm.tm_sec = (dostime & 0x1f) * 2;
1295 stm.tm_min = (dostime >> 5) & 0x3f;
1296 stm.tm_hour = (dostime >> 11) & 0x1f;
1297 stm.tm_mday = dosdate & 0x1f;
1298 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1299 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
1300 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001301
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001302 return mktime(&stm);
Just van Rossum52e14d62002-12-30 22:08:05 +00001303}
1304
1305/* Given a path to a .pyc or .pyo file in the archive, return the
Ezio Melotti13925002011-03-16 11:05:33 +02001306 modification time of the matching .py file, or 0 if no source
Just van Rossum52e14d62002-12-30 22:08:05 +00001307 is available. */
1308static time_t
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001309get_mtime_of_source(ZipImporter *self, PyObject *path)
Just van Rossum52e14d62002-12-30 22:08:05 +00001310{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001311 PyObject *toc_entry, *stripped;
1312 time_t mtime;
1313
1314 /* strip 'c' or 'o' from *.py[co] */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001315 if (PyUnicode_READY(path) == -1)
1316 return (time_t)-1;
1317 stripped = PyUnicode_FromKindAndData(PyUnicode_KIND(path),
1318 PyUnicode_DATA(path),
1319 PyUnicode_GET_LENGTH(path) - 1);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001320 if (stripped == NULL)
1321 return (time_t)-1;
1322
1323 toc_entry = PyDict_GetItem(self->files, stripped);
1324 Py_DECREF(stripped);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1326 PyTuple_Size(toc_entry) == 8) {
1327 /* fetch the time stamp of the .py file for comparison
1328 with an embedded pyc time stamp */
1329 int time, date;
1330 time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1331 date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1332 mtime = parse_dostime(time, date);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001333 } else
1334 mtime = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001335 return mtime;
Just van Rossum52e14d62002-12-30 22:08:05 +00001336}
1337
1338/* Return the code object for the module named by 'fullname' from the
1339 Zip archive as a new reference. */
1340static PyObject *
1341get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001342 time_t mtime, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +00001343{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001344 PyObject *data, *modpath, *code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001345
Victor Stinner60fe8d92010-08-16 23:48:11 +00001346 data = get_data(self->archive, toc_entry);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 if (data == NULL)
1348 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001349
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001350 modpath = PyTuple_GetItem(toc_entry, 0);
Victor Stinner2a94f4c2010-10-18 12:15:34 +00001351 if (isbytecode)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001352 code = unmarshal_code(modpath, data, mtime);
Victor Stinner2a94f4c2010-10-18 12:15:34 +00001353 else
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001354 code = compile_source(modpath, data);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001355 Py_DECREF(data);
1356 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001357}
1358
Ezio Melotti42da6632011-03-15 05:18:48 +02001359/* Get the code object associated with the module specified by
Just van Rossum52e14d62002-12-30 22:08:05 +00001360 'fullname'. */
1361static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001362get_module_code(ZipImporter *self, PyObject *fullname,
Victor Stinner08654e12010-10-18 12:09:02 +00001363 int *p_ispackage, PyObject **p_modpath)
Just van Rossum52e14d62002-12-30 22:08:05 +00001364{
Gregory P. Smith95c7c462011-05-21 05:19:42 -07001365 PyObject *code = NULL, *toc_entry, *subname;
Victor Stinner9a2261a2011-05-26 13:59:41 +02001366 PyObject *path, *fullpath = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001367 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +00001368
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001369 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001370 if (subname == NULL)
1371 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001372
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001373 path = make_filename(self->prefix, subname);
1374 Py_DECREF(subname);
1375 if (path == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001376 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 for (zso = zip_searchorder; *zso->suffix; zso++) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001379 code = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001380
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001381 fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
1382 if (fullpath == NULL)
1383 goto exit;
1384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001385 if (Py_VerboseFlag > 1)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001386 PySys_FormatStderr("# trying %U%c%U\n",
1387 self->archive, (int)SEP, fullpath);
1388 toc_entry = PyDict_GetItem(self->files, fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 if (toc_entry != NULL) {
1390 time_t mtime = 0;
1391 int ispackage = zso->type & IS_PACKAGE;
1392 int isbytecode = zso->type & IS_BYTECODE;
Just van Rossum52e14d62002-12-30 22:08:05 +00001393
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001394 if (isbytecode) {
1395 mtime = get_mtime_of_source(self, fullpath);
1396 if (mtime == (time_t)-1 && PyErr_Occurred()) {
1397 goto exit;
1398 }
1399 }
1400 Py_CLEAR(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 if (p_ispackage != NULL)
1402 *p_ispackage = ispackage;
1403 code = get_code_from_data(self, ispackage,
1404 isbytecode, mtime,
1405 toc_entry);
1406 if (code == Py_None) {
1407 /* bad magic number or non-matching mtime
1408 in byte code, try next */
1409 Py_DECREF(code);
1410 continue;
1411 }
Victor Stinner08654e12010-10-18 12:09:02 +00001412 if (code != NULL && p_modpath != NULL) {
1413 *p_modpath = PyTuple_GetItem(toc_entry, 0);
1414 Py_INCREF(*p_modpath);
1415 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001416 goto exit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001418 else
1419 Py_CLEAR(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001421 PyErr_Format(ZipImportError, "can't find module %R", fullname);
1422exit:
1423 Py_DECREF(path);
1424 Py_XDECREF(fullpath);
1425 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001426}
1427
1428
1429/* Module init */
1430
1431PyDoc_STRVAR(zipimport_doc,
1432"zipimport provides support for importing Python modules from Zip archives.\n\
1433\n\
1434This module exports three objects:\n\
1435- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001436- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001437 subclass of ImportError, so it can be caught as ImportError, too.\n\
1438- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1439 info dicts, as used in zipimporter._files.\n\
1440\n\
1441It is usually not needed to use the zipimport module explicitly; it is\n\
1442used by the builtin import mechanism for sys.path items that are paths\n\
1443to Zip archives.");
1444
Martin v. Löwis1a214512008-06-11 05:26:20 +00001445static struct PyModuleDef zipimportmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001446 PyModuleDef_HEAD_INIT,
1447 "zipimport",
1448 zipimport_doc,
1449 -1,
1450 NULL,
1451 NULL,
1452 NULL,
1453 NULL,
1454 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001455};
1456
Just van Rossum52e14d62002-12-30 22:08:05 +00001457PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001458PyInit_zipimport(void)
Just van Rossum52e14d62002-12-30 22:08:05 +00001459{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001460 PyObject *mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001461
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 if (PyType_Ready(&ZipImporter_Type) < 0)
1463 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001464
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001465 /* Correct directory separator */
1466 zip_searchorder[0].suffix[0] = SEP;
1467 zip_searchorder[1].suffix[0] = SEP;
1468 zip_searchorder[2].suffix[0] = SEP;
1469 if (Py_OptimizeFlag) {
1470 /* Reverse *.pyc and *.pyo */
1471 struct st_zip_searchorder tmp;
1472 tmp = zip_searchorder[0];
1473 zip_searchorder[0] = zip_searchorder[1];
1474 zip_searchorder[1] = tmp;
1475 tmp = zip_searchorder[3];
1476 zip_searchorder[3] = zip_searchorder[4];
1477 zip_searchorder[4] = tmp;
1478 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001479
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 mod = PyModule_Create(&zipimportmodule);
1481 if (mod == NULL)
1482 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001483
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001484 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1485 PyExc_ImportError, NULL);
1486 if (ZipImportError == NULL)
1487 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001488
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001489 Py_INCREF(ZipImportError);
1490 if (PyModule_AddObject(mod, "ZipImportError",
1491 ZipImportError) < 0)
1492 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001493
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001494 Py_INCREF(&ZipImporter_Type);
1495 if (PyModule_AddObject(mod, "zipimporter",
1496 (PyObject *)&ZipImporter_Type) < 0)
1497 return NULL;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001498
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 zip_directory_cache = PyDict_New();
1500 if (zip_directory_cache == NULL)
1501 return NULL;
1502 Py_INCREF(zip_directory_cache);
1503 if (PyModule_AddObject(mod, "_zip_directory_cache",
1504 zip_directory_cache) < 0)
1505 return NULL;
1506 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001507}