blob: dceca5e7cd4e3b286ba2db42cdb65573421129d2 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000013 char suffix[14];
14 int type;
Just van Rossum52e14d62002-12-30 22:08:05 +000015};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
Just van Rossum52e14d62002-12-30 22:08:05 +000030};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000037 PyObject_HEAD
Victor Stinner9e40fad2010-10-18 22:34:46 +000038 PyObject *archive; /* pathname of the Zip archive,
39 decoded from the filesystem encoding */
Victor Stinner72f767e2010-10-18 11:44:21 +000040 PyObject *prefix; /* file prefix: "a/sub/directory/",
41 encoded to the filesystem encoding */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000042 PyObject *files; /* dict with file info {path: toc_entry} */
Just van Rossum52e14d62002-12-30 22:08:05 +000043};
44
Just van Rossum52e14d62002-12-30 22:08:05 +000045static PyObject *ZipImportError;
Victor Stinnerc342fca2010-10-18 11:39:05 +000046/* read_directory() cache */
Just van Rossum52e14d62002-12-30 22:08:05 +000047static PyObject *zip_directory_cache = NULL;
48
49/* forward decls */
Victor Stinner2460a432010-08-16 17:54:28 +000050static PyObject *read_directory(PyObject *archive);
Victor Stinner60fe8d92010-08-16 23:48:11 +000051static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
Victor Stinnerf6b563a2011-03-14 20:46:50 -040052static PyObject *get_module_code(ZipImporter *self, PyObject *fullname,
Victor Stinner08654e12010-10-18 12:09:02 +000053 int *p_ispackage, PyObject **p_modpath);
Just van Rossum52e14d62002-12-30 22:08:05 +000054
55
56#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
57
58
59/* zipimporter.__init__
60 Split the "subdirectory" from the Zip archive path, lookup a matching
61 entry in sys.path_importer_cache, fetch the file directory from there
62 if found, or else read it from the archive. */
63static int
64zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
65{
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010066 PyObject *path, *files, *tmp;
67 PyObject *filename = NULL;
68 Py_ssize_t len, flen;
69#ifdef ALTSEP
70 _Py_IDENTIFIER(replace);
71#endif
Just van Rossum52e14d62002-12-30 22:08:05 +000072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000073 if (!_PyArg_NoKeywords("zipimporter()", kwds))
74 return -1;
Georg Brandl02c42872005-08-26 06:42:30 +000075
Victor Stinner2b8dab72010-08-14 14:54:10 +000076 if (!PyArg_ParseTuple(args, "O&:zipimporter",
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010077 PyUnicode_FSDecoder, &path))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +000079
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010080 if (PyUnicode_READY(path) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020081 return -1;
82
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010083 len = PyUnicode_GET_LENGTH(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (len == 0) {
85 PyErr_SetString(ZipImportError, "archive path is empty");
Victor Stinner2b8dab72010-08-14 14:54:10 +000086 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 }
Just van Rossum52e14d62002-12-30 22:08:05 +000088
89#ifdef ALTSEP
Martin v. Löwiscfa61292011-10-31 09:01:22 +010090 tmp = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010091 if (!tmp)
92 goto error;
93 Py_DECREF(path);
94 path = tmp;
Just van Rossum52e14d62002-12-30 22:08:05 +000095#endif
96
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010097 filename = path;
98 Py_INCREF(filename);
99 flen = len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 for (;;) {
101 struct stat statbuf;
102 int rv;
Just van Rossum52e14d62002-12-30 22:08:05 +0000103
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100104 rv = _Py_stat(filename, &statbuf);
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100105 if (rv == -2)
106 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107 if (rv == 0) {
108 /* it exists */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100109 if (!S_ISREG(statbuf.st_mode))
110 /* it's a not file */
111 Py_CLEAR(filename);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000112 break;
113 }
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100114 Py_CLEAR(filename);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 /* back up one path element */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100116 flen = PyUnicode_FindChar(path, SEP, 0, flen, -1);
117 if (flen == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000118 break;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100119 filename = PyUnicode_Substring(path, 0, flen);
Victor Stinneraf8b7e82013-10-29 01:46:24 +0100120 if (filename == NULL)
121 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 }
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100123 if (filename == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 PyErr_SetString(ZipImportError, "not a Zip file");
Victor Stinner2b8dab72010-08-14 14:54:10 +0000125 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000127
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100128 if (PyUnicode_READY(filename) < 0)
129 goto error;
130
131 files = PyDict_GetItem(zip_directory_cache, filename);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000132 if (files == NULL) {
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100133 files = read_directory(filename);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000134 if (files == NULL)
135 goto error;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100136 if (PyDict_SetItem(zip_directory_cache, filename, files) != 0)
Victor Stinner2b8dab72010-08-14 14:54:10 +0000137 goto error;
138 }
139 else
140 Py_INCREF(files);
141 self->files = files;
142
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100143 /* Transfer reference */
144 self->archive = filename;
145 filename = NULL;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000146
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100147 /* Check if there is a prefix directory following the filename. */
148 if (flen != len) {
149 tmp = PyUnicode_Substring(path, flen+1,
150 PyUnicode_GET_LENGTH(path));
151 if (tmp == NULL)
152 goto error;
153 self->prefix = tmp;
154 if (PyUnicode_READ_CHAR(path, len-1) != SEP) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000155 /* add trailing SEP */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100156 tmp = PyUnicode_FromFormat("%U%c", self->prefix, SEP);
157 if (tmp == NULL)
158 goto error;
159 Py_DECREF(self->prefix);
160 self->prefix = tmp;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 }
162 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000163 else
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100164 self->prefix = PyUnicode_New(0, 0);
165 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 return 0;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000167
168error:
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100169 Py_DECREF(path);
170 Py_XDECREF(filename);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000171 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +0000172}
173
174/* GC support. */
175static int
176zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
177{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000178 ZipImporter *self = (ZipImporter *)obj;
179 Py_VISIT(self->files);
180 return 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000181}
182
183static void
184zipimporter_dealloc(ZipImporter *self)
185{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000186 PyObject_GC_UnTrack(self);
187 Py_XDECREF(self->archive);
188 Py_XDECREF(self->prefix);
189 Py_XDECREF(self->files);
190 Py_TYPE(self)->tp_free((PyObject *)self);
Just van Rossum52e14d62002-12-30 22:08:05 +0000191}
192
193static PyObject *
194zipimporter_repr(ZipImporter *self)
195{
Victor Stinner028dd972010-08-17 00:04:48 +0000196 if (self->archive == NULL)
197 return PyUnicode_FromString("<zipimporter object \"???\">");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200198 else if (self->prefix != NULL && PyUnicode_GET_LENGTH(self->prefix) != 0)
Victor Stinner07298a12010-10-18 22:45:54 +0000199 return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">",
Victor Stinner028dd972010-08-17 00:04:48 +0000200 self->archive, SEP, self->prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 else
Victor Stinner07298a12010-10-18 22:45:54 +0000202 return PyUnicode_FromFormat("<zipimporter object \"%U\">",
Victor Stinner028dd972010-08-17 00:04:48 +0000203 self->archive);
Just van Rossum52e14d62002-12-30 22:08:05 +0000204}
205
206/* return fullname.split(".")[-1] */
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400207static PyObject *
208get_subname(PyObject *fullname)
Just van Rossum52e14d62002-12-30 22:08:05 +0000209{
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100210 Py_ssize_t len, dot;
211 if (PyUnicode_READY(fullname) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200212 return NULL;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100213 len = PyUnicode_GET_LENGTH(fullname);
214 dot = PyUnicode_FindChar(fullname, '.', 0, len, -1);
215 if (dot == -1) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400216 Py_INCREF(fullname);
217 return fullname;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100218 } else
219 return PyUnicode_Substring(fullname, dot+1, len);
Just van Rossum52e14d62002-12-30 22:08:05 +0000220}
221
222/* Given a (sub)modulename, write the potential file path in the
223 archive (without extension) to the path buffer. Return the
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400224 length of the resulting string.
225
226 return self.prefix + name.replace('.', os.sep) */
227static PyObject*
228make_filename(PyObject *prefix, PyObject *name)
Just van Rossum52e14d62002-12-30 22:08:05 +0000229{
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400230 PyObject *pathobj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200231 Py_UCS4 *p, *buf;
232 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000233
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200234 len = PyUnicode_GET_LENGTH(prefix) + PyUnicode_GET_LENGTH(name) + 1;
235 p = buf = PyMem_Malloc(sizeof(Py_UCS4) * len);
236 if (buf == NULL) {
237 PyErr_NoMemory();
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400238 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200239 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000240
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200241 if (!PyUnicode_AsUCS4(prefix, p, len, 0)) {
242 PyMem_Free(buf);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200243 return NULL;
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200244 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200245 p += PyUnicode_GET_LENGTH(prefix);
246 len -= PyUnicode_GET_LENGTH(prefix);
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200247 if (!PyUnicode_AsUCS4(name, p, len, 1)) {
248 PyMem_Free(buf);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200249 return NULL;
Christian Heimes1b5c76a2012-09-10 02:00:34 +0200250 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400251 for (; *p; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 if (*p == '.')
253 *p = SEP;
254 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200255 pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
256 buf, p-buf);
257 PyMem_Free(buf);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400258 return pathobj;
Just van Rossum52e14d62002-12-30 22:08:05 +0000259}
260
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000261enum zi_module_info {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000262 MI_ERROR,
263 MI_NOT_FOUND,
264 MI_MODULE,
265 MI_PACKAGE
Just van Rossum52e14d62002-12-30 22:08:05 +0000266};
267
Eric V. Smith984b11f2012-05-24 20:21:04 -0400268/* Does this path represent a directory?
269 on error, return < 0
270 if not a dir, return 0
271 if a dir, return 1
272*/
273static int
274check_is_directory(ZipImporter *self, PyObject* prefix, PyObject *path)
275{
276 PyObject *dirpath;
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700277 int res;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400278
279 /* See if this is a "directory". If so, it's eligible to be part
280 of a namespace package. We test by seeing if the name, with an
281 appended path separator, exists. */
282 dirpath = PyUnicode_FromFormat("%U%U%c", prefix, path, SEP);
283 if (dirpath == NULL)
284 return -1;
285 /* If dirpath is present in self->files, we have a directory. */
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700286 res = PyDict_Contains(self->files, dirpath);
Eric V. Smith984b11f2012-05-24 20:21:04 -0400287 Py_DECREF(dirpath);
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700288 return res;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400289}
290
Just van Rossum52e14d62002-12-30 22:08:05 +0000291/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000292static enum zi_module_info
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400293get_module_info(ZipImporter *self, PyObject *fullname)
Just van Rossum52e14d62002-12-30 22:08:05 +0000294{
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400295 PyObject *subname;
296 PyObject *path, *fullpath, *item;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000297 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +0000298
Victor Stinner965a8a12010-10-18 21:44:33 +0000299 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400300 if (subname == NULL)
301 return MI_ERROR;
Just van Rossum52e14d62002-12-30 22:08:05 +0000302
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400303 path = make_filename(self->prefix, subname);
304 Py_DECREF(subname);
305 if (path == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000306 return MI_ERROR;
Just van Rossum52e14d62002-12-30 22:08:05 +0000307
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000308 for (zso = zip_searchorder; *zso->suffix; zso++) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400309 fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
310 if (fullpath == NULL) {
311 Py_DECREF(path);
312 return MI_ERROR;
313 }
314 item = PyDict_GetItem(self->files, fullpath);
315 Py_DECREF(fullpath);
316 if (item != NULL) {
317 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 if (zso->type & IS_PACKAGE)
319 return MI_PACKAGE;
320 else
321 return MI_MODULE;
322 }
323 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400324 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000325 return MI_NOT_FOUND;
Just van Rossum52e14d62002-12-30 22:08:05 +0000326}
327
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700328typedef enum {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700329 FL_ERROR,
330 FL_NOT_FOUND,
331 FL_MODULE_FOUND,
332 FL_NS_FOUND
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700333} find_loader_result;
334
Eric V. Smith984b11f2012-05-24 20:21:04 -0400335/* The guts of "find_loader" and "find_module". Return values:
336 -1: error
337 0: no loader or namespace portions found
338 1: module/package found
339 2: namespace portion found: *namespace_portion will point to the name
340*/
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700341static find_loader_result
Eric V. Smith984b11f2012-05-24 20:21:04 -0400342find_loader(ZipImporter *self, PyObject *fullname, PyObject **namespace_portion)
343{
344 enum zi_module_info mi;
345
346 *namespace_portion = NULL;
347
348 mi = get_module_info(self, fullname);
349 if (mi == MI_ERROR)
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700350 return FL_ERROR;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400351 if (mi == MI_NOT_FOUND) {
352 /* Not a module or regular package. See if this is a directory, and
353 therefore possibly a portion of a namespace package. */
354 int is_dir = check_is_directory(self, self->prefix, fullname);
355 if (is_dir < 0)
356 return -1;
357 if (is_dir) {
358 /* This is possibly a portion of a namespace
359 package. Return the string representing its path,
360 without a trailing separator. */
361 *namespace_portion = PyUnicode_FromFormat("%U%c%U%U",
362 self->archive, SEP,
363 self->prefix, fullname);
364 if (*namespace_portion == NULL)
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700365 return FL_ERROR;
366 return FL_NS_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400367 }
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700368 return FL_NOT_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400369 }
370 /* This is a module or package. */
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700371 return FL_MODULE_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400372}
373
374
Just van Rossum52e14d62002-12-30 22:08:05 +0000375/* Check whether we can satisfy the import of the module named by
376 'fullname'. Return self if we can, None if we can't. */
377static PyObject *
378zipimporter_find_module(PyObject *obj, PyObject *args)
379{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000380 ZipImporter *self = (ZipImporter *)obj;
381 PyObject *path = NULL;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400382 PyObject *fullname;
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700383 PyObject *namespace_portion = NULL;
384 PyObject *result = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000385
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700386 if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module", &fullname, &path))
387 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000388
Eric V. Smith984b11f2012-05-24 20:21:04 -0400389 switch (find_loader(self, fullname, &namespace_portion)) {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700390 case FL_ERROR:
Benjamin Petersona6a7a1a2012-05-25 00:22:04 -0700391 return NULL;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700392 case FL_NS_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700393 /* A namespace portion is not allowed via find_module, so return None. */
Eric V. Smith984b11f2012-05-24 20:21:04 -0400394 Py_DECREF(namespace_portion);
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700395 /* FALL THROUGH */
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700396 case FL_NOT_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700397 result = Py_None;
398 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700399 case FL_MODULE_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700400 result = (PyObject *)self;
401 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402 }
Benjamin Petersona6a7a1a2012-05-25 00:22:04 -0700403 Py_INCREF(result);
Benjamin Peterson2d12e142012-05-25 00:19:40 -0700404 return result;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400405}
406
407
408/* Check whether we can satisfy the import of the module named by
409 'fullname', or whether it could be a portion of a namespace
410 package. Return self if we can load it, a string containing the
411 full path if it's a possible namespace portion, None if we
412 can't load it. */
413static PyObject *
414zipimporter_find_loader(PyObject *obj, PyObject *args)
415{
416 ZipImporter *self = (ZipImporter *)obj;
417 PyObject *path = NULL;
418 PyObject *fullname;
419 PyObject *result = NULL;
420 PyObject *namespace_portion = NULL;
421
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700422 if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module", &fullname, &path))
423 return NULL;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400424
425 switch (find_loader(self, fullname, &namespace_portion)) {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700426 case FL_ERROR:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700427 return NULL;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700428 case FL_NOT_FOUND: /* Not found, return (None, []) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700429 result = Py_BuildValue("O[]", Py_None);
430 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700431 case FL_MODULE_FOUND: /* Return (self, []) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700432 result = Py_BuildValue("O[]", self);
433 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700434 case FL_NS_FOUND: /* Return (None, [namespace_portion]) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700435 result = Py_BuildValue("O[O]", Py_None, namespace_portion);
Benjamin Peterson209e04c2012-05-24 22:35:39 -0700436 Py_DECREF(namespace_portion);
Eric V. Smith984b11f2012-05-24 20:21:04 -0400437 return result;
438 }
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700439 return result;
Just van Rossum52e14d62002-12-30 22:08:05 +0000440}
441
442/* Load and return the module named by 'fullname'. */
443static PyObject *
444zipimporter_load_module(PyObject *obj, PyObject *args)
445{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 ZipImporter *self = (ZipImporter *)obj;
Victor Stinner26fabe12010-10-18 12:03:25 +0000447 PyObject *code = NULL, *mod, *dict;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400448 PyObject *fullname;
449 PyObject *modpath = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 int ispackage;
Just van Rossum52e14d62002-12-30 22:08:05 +0000451
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400452 if (!PyArg_ParseTuple(args, "U:zipimporter.load_module",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000453 &fullname))
454 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200455 if (PyUnicode_READY(fullname) == -1)
456 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000457
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000458 code = get_module_code(self, fullname, &ispackage, &modpath);
459 if (code == NULL)
Victor Stinner26fabe12010-10-18 12:03:25 +0000460 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000461
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400462 mod = PyImport_AddModuleObject(fullname);
Victor Stinner26fabe12010-10-18 12:03:25 +0000463 if (mod == NULL)
464 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 dict = PyModule_GetDict(mod);
Just van Rossum52e14d62002-12-30 22:08:05 +0000466
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000467 /* mod.__loader__ = self */
468 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
469 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000470
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000471 if (ispackage) {
472 /* add __path__ to the module *before* the code gets
473 executed */
Victor Stinneraf8b7e82013-10-29 01:46:24 +0100474 PyObject *pkgpath, *fullpath, *subname;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000475 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000476
Victor Stinneraf8b7e82013-10-29 01:46:24 +0100477 subname = get_subname(fullname);
478 if (subname == NULL)
479 goto error;
480
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400481 fullpath = PyUnicode_FromFormat("%U%c%U%U",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000482 self->archive, SEP,
483 self->prefix, subname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400484 Py_DECREF(subname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000485 if (fullpath == NULL)
486 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000487
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400488 pkgpath = Py_BuildValue("[N]", fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000489 if (pkgpath == NULL)
490 goto error;
491 err = PyDict_SetItemString(dict, "__path__", pkgpath);
492 Py_DECREF(pkgpath);
493 if (err != 0)
494 goto error;
495 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400496 mod = PyImport_ExecCodeModuleObject(fullname, code, modpath, NULL);
Victor Stinner26fabe12010-10-18 12:03:25 +0000497 Py_CLEAR(code);
498 if (mod == NULL)
499 goto error;
500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000501 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400502 PySys_FormatStderr("import %U # loaded from Zip %U\n",
Victor Stinner08654e12010-10-18 12:09:02 +0000503 fullname, modpath);
504 Py_DECREF(modpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +0000506error:
Victor Stinner26fabe12010-10-18 12:03:25 +0000507 Py_XDECREF(code);
Victor Stinner08654e12010-10-18 12:09:02 +0000508 Py_XDECREF(modpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000509 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000510}
511
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000512/* Return a string matching __file__ for the named module */
513static PyObject *
514zipimporter_get_filename(PyObject *obj, PyObject *args)
515{
516 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400517 PyObject *fullname, *code, *modpath;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000518 int ispackage;
519
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400520 if (!PyArg_ParseTuple(args, "U:zipimporter.get_filename",
Victor Stinner9e40fad2010-10-18 22:34:46 +0000521 &fullname))
Victor Stinnerc342fca2010-10-18 11:39:05 +0000522 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000523
524 /* Deciding the filename requires working out where the code
525 would come from if the module was actually loaded */
526 code = get_module_code(self, fullname, &ispackage, &modpath);
527 if (code == NULL)
Victor Stinnerc342fca2010-10-18 11:39:05 +0000528 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000529 Py_DECREF(code); /* Only need the path info */
530
Victor Stinner08654e12010-10-18 12:09:02 +0000531 return modpath;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000532}
533
Just van Rossum52e14d62002-12-30 22:08:05 +0000534/* Return a bool signifying whether the module is a package or not. */
535static PyObject *
536zipimporter_is_package(PyObject *obj, PyObject *args)
537{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000538 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400539 PyObject *fullname;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000540 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000541
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400542 if (!PyArg_ParseTuple(args, "U:zipimporter.is_package",
Victor Stinner965a8a12010-10-18 21:44:33 +0000543 &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000544 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000546 mi = get_module_info(self, fullname);
547 if (mi == MI_ERROR)
Victor Stinner965a8a12010-10-18 21:44:33 +0000548 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000549 if (mi == MI_NOT_FOUND) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400550 PyErr_Format(ZipImportError, "can't find module %R", fullname);
Victor Stinner965a8a12010-10-18 21:44:33 +0000551 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000552 }
553 return PyBool_FromLong(mi == MI_PACKAGE);
Just van Rossum52e14d62002-12-30 22:08:05 +0000554}
555
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200556
Just van Rossum52e14d62002-12-30 22:08:05 +0000557static PyObject *
558zipimporter_get_data(PyObject *obj, PyObject *args)
559{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000560 ZipImporter *self = (ZipImporter *)obj;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100561 PyObject *path, *key;
Just van Rossum52e14d62002-12-30 22:08:05 +0000562#ifdef ALTSEP
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100563 _Py_IDENTIFIER(replace);
Just van Rossum52e14d62002-12-30 22:08:05 +0000564#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000565 PyObject *toc_entry;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100566 Py_ssize_t path_start, path_len, len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000567
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100568 if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &path))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000569 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000570
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200571#ifdef ALTSEP
Martin v. Löwiscfa61292011-10-31 09:01:22 +0100572 path = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100573 if (!path)
574 return NULL;
575#else
576 Py_INCREF(path);
Just van Rossum52e14d62002-12-30 22:08:05 +0000577#endif
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100578 if (PyUnicode_READY(path) == -1)
579 goto error;
580
581 path_len = PyUnicode_GET_LENGTH(path);
582
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200583 len = PyUnicode_GET_LENGTH(self->archive);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100584 path_start = 0;
585 if (PyUnicode_Tailmatch(path, self->archive, 0, len, -1)
586 && PyUnicode_READ_CHAR(path, len) == SEP) {
587 path_start = len + 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000588 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000589
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100590 key = PyUnicode_Substring(path, path_start, path_len);
Victor Stinner60fe8d92010-08-16 23:48:11 +0000591 if (key == NULL)
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100592 goto error;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000593 toc_entry = PyDict_GetItem(self->files, key);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000594 if (toc_entry == NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000595 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key);
596 Py_DECREF(key);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100597 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000598 }
Victor Stinner60fe8d92010-08-16 23:48:11 +0000599 Py_DECREF(key);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100600 Py_DECREF(path);
Victor Stinner60fe8d92010-08-16 23:48:11 +0000601 return get_data(self->archive, toc_entry);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100602 error:
603 Py_DECREF(path);
604 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000605}
606
607static PyObject *
608zipimporter_get_code(PyObject *obj, PyObject *args)
609{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000610 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400611 PyObject *fullname;
Just van Rossum52e14d62002-12-30 22:08:05 +0000612
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400613 if (!PyArg_ParseTuple(args, "U:zipimporter.get_code", &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000615
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000616 return get_module_code(self, fullname, NULL, NULL);
Just van Rossum52e14d62002-12-30 22:08:05 +0000617}
618
619static PyObject *
620zipimporter_get_source(PyObject *obj, PyObject *args)
621{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000622 ZipImporter *self = (ZipImporter *)obj;
623 PyObject *toc_entry;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400624 PyObject *fullname, *subname, *path, *fullpath;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000625 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000626
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400627 if (!PyArg_ParseTuple(args, "U:zipimporter.get_source", &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000628 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000629
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000630 mi = get_module_info(self, fullname);
Victor Stinner965a8a12010-10-18 21:44:33 +0000631 if (mi == MI_ERROR)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000632 return NULL;
Victor Stinner04106562010-10-18 20:44:08 +0000633 if (mi == MI_NOT_FOUND) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400634 PyErr_Format(ZipImportError, "can't find module %R", fullname);
Victor Stinner04106562010-10-18 20:44:08 +0000635 return NULL;
636 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400637
Victor Stinner965a8a12010-10-18 21:44:33 +0000638 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400639 if (subname == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000640 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000641
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400642 path = make_filename(self->prefix, subname);
643 Py_DECREF(subname);
644 if (path == NULL)
645 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000646
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400647 if (mi == MI_PACKAGE)
648 fullpath = PyUnicode_FromFormat("%U%c__init__.py", path, SEP);
649 else
650 fullpath = PyUnicode_FromFormat("%U.py", path);
651 Py_DECREF(path);
652 if (fullpath == NULL)
653 return NULL;
654
655 toc_entry = PyDict_GetItem(self->files, fullpath);
656 Py_DECREF(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000657 if (toc_entry != NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000658 PyObject *res, *bytes;
659 bytes = get_data(self->archive, toc_entry);
660 if (bytes == NULL)
661 return NULL;
662 res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
663 PyBytes_GET_SIZE(bytes));
664 Py_DECREF(bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000665 return res;
666 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 /* we have the module, but no source */
669 Py_INCREF(Py_None);
670 return Py_None;
Just van Rossum52e14d62002-12-30 22:08:05 +0000671}
672
673PyDoc_STRVAR(doc_find_module,
674"find_module(fullname, path=None) -> self or None.\n\
675\n\
676Search for a module specified by 'fullname'. 'fullname' must be the\n\
677fully qualified (dotted) module name. It returns the zipimporter\n\
678instance itself if the module was found, or None if it wasn't.\n\
679The optional 'path' argument is ignored -- it's there for compatibility\n\
680with the importer protocol.");
681
Eric V. Smith984b11f2012-05-24 20:21:04 -0400682PyDoc_STRVAR(doc_find_loader,
683"find_loader(fullname, path=None) -> self, str or None.\n\
684\n\
685Search for a module specified by 'fullname'. 'fullname' must be the\n\
686fully qualified (dotted) module name. It returns the zipimporter\n\
687instance itself if the module was found, a string containing the\n\
688full path name if it's possibly a portion of a namespace package,\n\
689or None otherwise. The optional 'path' argument is ignored -- it's\n\
690 there for compatibility with the importer protocol.");
691
Just van Rossum52e14d62002-12-30 22:08:05 +0000692PyDoc_STRVAR(doc_load_module,
693"load_module(fullname) -> module.\n\
694\n\
695Load the module specified by 'fullname'. 'fullname' must be the\n\
696fully qualified (dotted) module name. It returns the imported\n\
697module, or raises ZipImportError if it wasn't found.");
698
699PyDoc_STRVAR(doc_get_data,
700"get_data(pathname) -> string with file data.\n\
701\n\
702Return the data associated with 'pathname'. Raise IOError if\n\
703the file wasn't found.");
704
705PyDoc_STRVAR(doc_is_package,
706"is_package(fullname) -> bool.\n\
707\n\
708Return True if the module specified by fullname is a package.\n\
Brian Curtin32839732010-07-21 01:44:19 +0000709Raise ZipImportError if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000710
711PyDoc_STRVAR(doc_get_code,
712"get_code(fullname) -> code object.\n\
713\n\
714Return the code object for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000715if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000716
717PyDoc_STRVAR(doc_get_source,
718"get_source(fullname) -> source string.\n\
719\n\
720Return the source code for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000721if the module couldn't be found, return None if the archive does\n\
Just van Rossum52e14d62002-12-30 22:08:05 +0000722contain the module, but has no source for it.");
723
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000724
725PyDoc_STRVAR(doc_get_filename,
Nick Coghlan9a1d6e32009-02-08 03:37:27 +0000726"get_filename(fullname) -> filename string.\n\
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000727\n\
728Return the filename for the specified module.");
729
Just van Rossum52e14d62002-12-30 22:08:05 +0000730static PyMethodDef zipimporter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000731 {"find_module", zipimporter_find_module, METH_VARARGS,
732 doc_find_module},
Eric V. Smith984b11f2012-05-24 20:21:04 -0400733 {"find_loader", zipimporter_find_loader, METH_VARARGS,
734 doc_find_loader},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000735 {"load_module", zipimporter_load_module, METH_VARARGS,
736 doc_load_module},
737 {"get_data", zipimporter_get_data, METH_VARARGS,
738 doc_get_data},
739 {"get_code", zipimporter_get_code, METH_VARARGS,
740 doc_get_code},
741 {"get_source", zipimporter_get_source, METH_VARARGS,
742 doc_get_source},
743 {"get_filename", zipimporter_get_filename, METH_VARARGS,
744 doc_get_filename},
745 {"is_package", zipimporter_is_package, METH_VARARGS,
746 doc_is_package},
747 {NULL, NULL} /* sentinel */
Just van Rossum52e14d62002-12-30 22:08:05 +0000748};
749
750static PyMemberDef zipimporter_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000751 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
752 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
753 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
754 {NULL}
Just van Rossum52e14d62002-12-30 22:08:05 +0000755};
756
757PyDoc_STRVAR(zipimporter_doc,
758"zipimporter(archivepath) -> zipimporter object\n\
759\n\
760Create a new zipimporter instance. 'archivepath' must be a path to\n\
Alexandre Vassalotti8ae3e052008-05-16 00:41:41 +0000761a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
762'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
763valid directory inside the archive.\n\
764\n\
765'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
766archive.\n\
767\n\
768The 'archive' attribute of zipimporter objects contains the name of the\n\
769zipfile targeted.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000770
771#define DEFERRED_ADDRESS(ADDR) 0
772
773static PyTypeObject ZipImporter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000774 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
775 "zipimport.zipimporter",
776 sizeof(ZipImporter),
777 0, /* tp_itemsize */
778 (destructor)zipimporter_dealloc, /* tp_dealloc */
779 0, /* tp_print */
780 0, /* tp_getattr */
781 0, /* tp_setattr */
782 0, /* tp_reserved */
783 (reprfunc)zipimporter_repr, /* tp_repr */
784 0, /* tp_as_number */
785 0, /* tp_as_sequence */
786 0, /* tp_as_mapping */
787 0, /* tp_hash */
788 0, /* tp_call */
789 0, /* tp_str */
790 PyObject_GenericGetAttr, /* tp_getattro */
791 0, /* tp_setattro */
792 0, /* tp_as_buffer */
793 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
794 Py_TPFLAGS_HAVE_GC, /* tp_flags */
795 zipimporter_doc, /* tp_doc */
796 zipimporter_traverse, /* tp_traverse */
797 0, /* tp_clear */
798 0, /* tp_richcompare */
799 0, /* tp_weaklistoffset */
800 0, /* tp_iter */
801 0, /* tp_iternext */
802 zipimporter_methods, /* tp_methods */
803 zipimporter_members, /* tp_members */
804 0, /* tp_getset */
805 0, /* tp_base */
806 0, /* tp_dict */
807 0, /* tp_descr_get */
808 0, /* tp_descr_set */
809 0, /* tp_dictoffset */
810 (initproc)zipimporter_init, /* tp_init */
811 PyType_GenericAlloc, /* tp_alloc */
812 PyType_GenericNew, /* tp_new */
813 PyObject_GC_Del, /* tp_free */
Just van Rossum52e14d62002-12-30 22:08:05 +0000814};
815
816
817/* implementation */
818
Just van Rossum52e14d62002-12-30 22:08:05 +0000819/* Given a buffer, return the long that is represented by the first
820 4 bytes, encoded as little endian. This partially reimplements
821 marshal.c:r_long() */
822static long
823get_long(unsigned char *buf) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000824 long x;
825 x = buf[0];
826 x |= (long)buf[1] << 8;
827 x |= (long)buf[2] << 16;
828 x |= (long)buf[3] << 24;
Just van Rossum52e14d62002-12-30 22:08:05 +0000829#if SIZEOF_LONG > 4
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000830 /* Sign extension for 64-bit machines */
831 x |= -(x & 0x80000000L);
Just van Rossum52e14d62002-12-30 22:08:05 +0000832#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000833 return x;
Just van Rossum52e14d62002-12-30 22:08:05 +0000834}
835
836/*
837 read_directory(archive) -> files dict (new reference)
838
839 Given a path to a Zip archive, build a dict, mapping file names
840 (local to the archive, using SEP as a separator) to toc entries.
841
842 A toc_entry is a tuple:
843
Victor Stinner08654e12010-10-18 12:09:02 +0000844 (__file__, # value to use for __file__, available for all files,
845 # encoded to the filesystem encoding
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000846 compress, # compression kind; 0 for uncompressed
847 data_size, # size of compressed data on disk
848 file_size, # size of decompressed data
849 file_offset, # offset of file header from start of archive
850 time, # mod time of file (in dos format)
851 date, # mod data of file (in dos format)
852 crc, # crc checksum of the data
Victor Stinnerc342fca2010-10-18 11:39:05 +0000853 )
Just van Rossum52e14d62002-12-30 22:08:05 +0000854
855 Directories can be recognized by the trailing SEP in the name,
856 data_size and file_offset are 0.
857*/
858static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400859read_directory(PyObject *archive)
Just van Rossum52e14d62002-12-30 22:08:05 +0000860{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000861 PyObject *files = NULL;
862 FILE *fp;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000863 unsigned short flags;
Gregory P. Smithab320662012-01-30 15:17:33 -0800864 short compress, time, date, name_size;
865 long crc, data_size, file_size, header_size;
866 Py_ssize_t file_offset, header_position, header_offset;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200867 long l, count;
868 Py_ssize_t i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000869 char name[MAXPATHLEN + 5];
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200870 char dummy[8]; /* Buffer to read unused header values into */
Victor Stinner2460a432010-08-16 17:54:28 +0000871 PyObject *nameobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 char *p, endof_central_dir[22];
Gregory P. Smithab320662012-01-30 15:17:33 -0800873 Py_ssize_t arc_offset; /* Absolute offset to start of the zip-archive. */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100874 PyObject *path;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000875 const char *charset;
Victor Stinner4ee65a92011-01-22 10:30:29 +0000876 int bootstrap;
Just van Rossum52e14d62002-12-30 22:08:05 +0000877
Victor Stinnerdaf45552013-08-28 00:53:59 +0200878 fp = _Py_fopen_obj(archive, "rb");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000879 if (fp == NULL) {
Victor Stinnerbd206e22011-12-18 21:04:17 +0100880 if (!PyErr_Occurred())
Victor Stinner35734762011-12-18 21:05:22 +0100881 PyErr_Format(ZipImportError, "can't open Zip file: %R", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 return NULL;
883 }
Jesus Cea09bf7a72012-10-03 02:13:05 +0200884
885 if (fseek(fp, -22, SEEK_END) == -1) {
886 fclose(fp);
887 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
888 return NULL;
889 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000890 header_position = ftell(fp);
891 if (fread(endof_central_dir, 1, 22, fp) != 22) {
892 fclose(fp);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400893 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000894 return NULL;
895 }
896 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
897 /* Bad: End of Central Dir signature */
898 fclose(fp);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400899 PyErr_Format(ZipImportError, "not a Zip file: %R", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000900 return NULL;
901 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000902
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 header_size = get_long((unsigned char *)endof_central_dir + 12);
904 header_offset = get_long((unsigned char *)endof_central_dir + 16);
905 arc_offset = header_position - header_offset - header_size;
906 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000907
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000908 files = PyDict_New();
909 if (files == NULL)
910 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000911
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000912 /* Start of Central Directory */
913 count = 0;
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200914 if (fseek(fp, header_offset, 0) == -1)
915 goto file_error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000916 for (;;) {
917 PyObject *t;
918 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000919
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200920 /* Start of file header */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000921 l = PyMarshal_ReadLongFromFile(fp);
Victor Stinner73660af2013-10-29 01:43:44 +0100922 if (l == -1 && PyErr_Occurred())
923 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000924 if (l != 0x02014B50)
925 break; /* Bad: Central Dir File Header */
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200926
927 /* On Windows, calling fseek to skip over the fields we don't use is
928 slower than reading the data into a dummy buffer because fseek flushes
929 stdio's internal buffers. See issue #8745. */
930 if (fread(dummy, 1, 4, fp) != 4) /* Skip unused fields, avoid fseek */
931 goto file_error;
932
Victor Stinnerd36c8212010-10-18 12:13:46 +0000933 flags = (unsigned short)PyMarshal_ReadShortFromFile(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000934 compress = PyMarshal_ReadShortFromFile(fp);
935 time = PyMarshal_ReadShortFromFile(fp);
936 date = PyMarshal_ReadShortFromFile(fp);
937 crc = PyMarshal_ReadLongFromFile(fp);
938 data_size = PyMarshal_ReadLongFromFile(fp);
939 file_size = PyMarshal_ReadLongFromFile(fp);
940 name_size = PyMarshal_ReadShortFromFile(fp);
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200941 header_size = name_size +
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000942 PyMarshal_ReadShortFromFile(fp) +
943 PyMarshal_ReadShortFromFile(fp);
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200944 if (fread(dummy, 1, 8, fp) != 8) /* Skip unused fields, avoid fseek */
945 goto file_error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000946 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Victor Stinner73660af2013-10-29 01:43:44 +0100947 if (PyErr_Occurred())
948 goto error;
949
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000950 if (name_size > MAXPATHLEN)
951 name_size = MAXPATHLEN;
Just van Rossum52e14d62002-12-30 22:08:05 +0000952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000953 p = name;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200954 for (i = 0; i < (Py_ssize_t)name_size; i++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 *p = (char)getc(fp);
956 if (*p == '/')
957 *p = SEP;
958 p++;
959 }
960 *p = 0; /* Add terminating null byte */
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +0200961 for (; i < header_size; i++) /* Skip the rest of the header */
962 if(getc(fp) == EOF) /* Avoid fseek */
963 goto file_error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000964
Victor Stinner4ee65a92011-01-22 10:30:29 +0000965 bootstrap = 0;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000966 if (flags & 0x0800)
967 charset = "utf-8";
Victor Stinner4ee65a92011-01-22 10:30:29 +0000968 else if (!PyThreadState_GET()->interp->codecs_initialized) {
969 /* During bootstrap, we may need to load the encodings
970 package from a ZIP file. But the cp437 encoding is implemented
971 in Python in the encodings package.
972
973 Break out of this dependency by assuming that the path to
974 the encodings module is ASCII-only. */
975 charset = "ascii";
976 bootstrap = 1;
977 }
Victor Stinnerd36c8212010-10-18 12:13:46 +0000978 else
979 charset = "cp437";
980 nameobj = PyUnicode_Decode(name, name_size, charset, NULL);
Victor Stinner4ee65a92011-01-22 10:30:29 +0000981 if (nameobj == NULL) {
982 if (bootstrap)
983 PyErr_Format(PyExc_NotImplementedError,
984 "bootstrap issue: python%i%i.zip contains non-ASCII "
985 "filenames without the unicode flag",
986 PY_MAJOR_VERSION, PY_MINOR_VERSION);
Victor Stinner2460a432010-08-16 17:54:28 +0000987 goto error;
Victor Stinner4ee65a92011-01-22 10:30:29 +0000988 }
Stefan Krah000fde92012-08-20 14:14:49 +0200989 if (PyUnicode_READY(nameobj) == -1)
990 goto error;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100991 path = PyUnicode_FromFormat("%U%c%U", archive, SEP, nameobj);
992 if (path == NULL)
Victor Stinner2460a432010-08-16 17:54:28 +0000993 goto error;
Gregory P. Smithcc6abd52012-01-30 15:55:29 -0800994 t = Py_BuildValue("Nhllnhhl", path, compress, data_size,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000995 file_size, file_offset, time, date, crc);
996 if (t == NULL)
997 goto error;
Victor Stinner2460a432010-08-16 17:54:28 +0000998 err = PyDict_SetItem(files, nameobj, t);
999 Py_CLEAR(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001000 Py_DECREF(t);
1001 if (err != 0)
1002 goto error;
1003 count++;
1004 }
1005 fclose(fp);
1006 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001007 PySys_FormatStderr("# zipimport: found %ld names in %R\n",
1008 count, archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001009 return files;
Serhiy Storchaka0e6b7b52013-02-16 17:43:45 +02001010file_error:
Jesus Cea09bf7a72012-10-03 02:13:05 +02001011 fclose(fp);
1012 Py_XDECREF(files);
1013 Py_XDECREF(nameobj);
1014 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1015 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001016error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001017 fclose(fp);
1018 Py_XDECREF(files);
Victor Stinner2460a432010-08-16 17:54:28 +00001019 Py_XDECREF(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001021}
1022
1023/* Return the zlib.decompress function object, or NULL if zlib couldn't
1024 be imported. The function is cached when found, so subsequent calls
Victor Stinner4925cde2011-05-20 00:16:09 +02001025 don't import zlib again. */
Just van Rossum52e14d62002-12-30 22:08:05 +00001026static PyObject *
1027get_decompress_func(void)
1028{
Victor Stinner4925cde2011-05-20 00:16:09 +02001029 static int importing_zlib = 0;
1030 PyObject *zlib;
1031 PyObject *decompress;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001032 _Py_IDENTIFIER(decompress);
Just van Rossum52e14d62002-12-30 22:08:05 +00001033
Victor Stinner4925cde2011-05-20 00:16:09 +02001034 if (importing_zlib != 0)
1035 /* Someone has a zlib.py[co] in their Zip file;
1036 let's avoid a stack overflow. */
1037 return NULL;
1038 importing_zlib = 1;
1039 zlib = PyImport_ImportModuleNoBlock("zlib");
1040 importing_zlib = 0;
1041 if (zlib != NULL) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001042 decompress = _PyObject_GetAttrId(zlib,
1043 &PyId_decompress);
Victor Stinner4925cde2011-05-20 00:16:09 +02001044 Py_DECREF(zlib);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001045 }
Victor Stinner4925cde2011-05-20 00:16:09 +02001046 else {
1047 PyErr_Clear();
1048 decompress = NULL;
1049 }
1050 if (Py_VerboseFlag)
1051 PySys_WriteStderr("# zipimport: zlib %s\n",
1052 zlib != NULL ? "available": "UNAVAILABLE");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001053 return decompress;
Just van Rossum52e14d62002-12-30 22:08:05 +00001054}
1055
1056/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
1057 data as a new reference. */
1058static PyObject *
Victor Stinner60fe8d92010-08-16 23:48:11 +00001059get_data(PyObject *archive, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +00001060{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001061 PyObject *raw_data, *data = NULL, *decompress;
1062 char *buf;
1063 FILE *fp;
1064 int err;
1065 Py_ssize_t bytes_read = 0;
1066 long l;
Victor Stinner60fe8d92010-08-16 23:48:11 +00001067 PyObject *datapath;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 long compress, data_size, file_size, file_offset, bytes_size;
1069 long time, date, crc;
Just van Rossum52e14d62002-12-30 22:08:05 +00001070
Victor Stinner60fe8d92010-08-16 23:48:11 +00001071 if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 &data_size, &file_size, &file_offset, &time,
1073 &date, &crc)) {
1074 return NULL;
1075 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001076
Victor Stinnerdaf45552013-08-28 00:53:59 +02001077 fp = _Py_fopen_obj(archive, "rb");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 if (!fp) {
Victor Stinnerbd206e22011-12-18 21:04:17 +01001079 if (!PyErr_Occurred())
1080 PyErr_Format(PyExc_IOError,
1081 "zipimport: can not open file %U", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 return NULL;
1083 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 /* Check to make sure the local file header is correct */
Jesus Cea09bf7a72012-10-03 02:13:05 +02001086 if (fseek(fp, file_offset, 0) == -1) {
1087 fclose(fp);
1088 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1089 return NULL;
1090 }
1091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 l = PyMarshal_ReadLongFromFile(fp);
1093 if (l != 0x04034B50) {
1094 /* Bad: Local File Header */
Victor Stinner73660af2013-10-29 01:43:44 +01001095 if (!PyErr_Occurred())
1096 PyErr_Format(ZipImportError,
1097 "bad local file header in %U",
1098 archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 fclose(fp);
1100 return NULL;
1101 }
Jesus Cea09bf7a72012-10-03 02:13:05 +02001102 if (fseek(fp, file_offset + 26, 0) == -1) {
1103 fclose(fp);
1104 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1105 return NULL;
1106 }
1107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 l = 30 + PyMarshal_ReadShortFromFile(fp) +
1109 PyMarshal_ReadShortFromFile(fp); /* local header size */
Victor Stinner73660af2013-10-29 01:43:44 +01001110 if (PyErr_Occurred()) {
1111 fclose(fp);
1112 return NULL;
1113 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 file_offset += l; /* Start of file data */
Just van Rossum52e14d62002-12-30 22:08:05 +00001115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 bytes_size = compress == 0 ? data_size : data_size + 1;
1117 if (bytes_size == 0)
1118 bytes_size++;
1119 raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
Just van Rossum52e14d62002-12-30 22:08:05 +00001120
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001121 if (raw_data == NULL) {
1122 fclose(fp);
1123 return NULL;
1124 }
1125 buf = PyBytes_AsString(raw_data);
Just van Rossum52e14d62002-12-30 22:08:05 +00001126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001127 err = fseek(fp, file_offset, 0);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001128 if (err == 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 bytes_read = fread(buf, 1, data_size, fp);
Jesus Cea09bf7a72012-10-03 02:13:05 +02001130 } else {
1131 fclose(fp);
1132 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
1133 return NULL;
1134 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001135 fclose(fp);
1136 if (err || bytes_read != data_size) {
1137 PyErr_SetString(PyExc_IOError,
1138 "zipimport: can't read data");
1139 Py_DECREF(raw_data);
1140 return NULL;
1141 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 if (compress != 0) {
1144 buf[data_size] = 'Z'; /* saw this in zipfile.py */
1145 data_size++;
1146 }
1147 buf[data_size] = '\0';
Just van Rossum52e14d62002-12-30 22:08:05 +00001148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 if (compress == 0) { /* data is not compressed */
1150 data = PyBytes_FromStringAndSize(buf, data_size);
1151 Py_DECREF(raw_data);
1152 return data;
1153 }
1154
1155 /* Decompress with zlib */
1156 decompress = get_decompress_func();
1157 if (decompress == NULL) {
1158 PyErr_SetString(ZipImportError,
1159 "can't decompress data; "
1160 "zlib not available");
1161 goto error;
1162 }
1163 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Victor Stinner4925cde2011-05-20 00:16:09 +02001164 Py_DECREF(decompress);
Just van Rossum52e14d62002-12-30 22:08:05 +00001165error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001166 Py_DECREF(raw_data);
1167 return data;
Just van Rossum52e14d62002-12-30 22:08:05 +00001168}
1169
1170/* Lenient date/time comparison function. The precision of the mtime
1171 in the archive is lower than the mtime stored in a .pyc: we
1172 must allow a difference of at most one second. */
1173static int
1174eq_mtime(time_t t1, time_t t2)
1175{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001176 time_t d = t1 - t2;
1177 if (d < 0)
1178 d = -d;
1179 /* dostime only stores even seconds, so be lenient */
1180 return d <= 1;
Just van Rossum52e14d62002-12-30 22:08:05 +00001181}
1182
1183/* Given the contents of a .py[co] file in a buffer, unmarshal the data
1184 and return the code object. Return None if it the magic word doesn't
1185 match (we do this instead of raising an exception as we fall back
1186 to .py if available and we don't want to mask other errors).
1187 Returns a new reference. */
1188static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001189unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
Just van Rossum52e14d62002-12-30 22:08:05 +00001190{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 PyObject *code;
1192 char *buf = PyBytes_AsString(data);
1193 Py_ssize_t size = PyBytes_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +00001194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001195 if (size <= 9) {
1196 PyErr_SetString(ZipImportError,
1197 "bad pyc data");
1198 return NULL;
1199 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001200
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
1202 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001203 PySys_FormatStderr("# %R has bad magic\n",
1204 pathname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 Py_INCREF(Py_None);
1206 return Py_None; /* signal caller to try alternative */
1207 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
1210 mtime)) {
1211 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001212 PySys_FormatStderr("# %R has bad mtime\n",
1213 pathname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001214 Py_INCREF(Py_None);
1215 return Py_None; /* signal caller to try alternative */
1216 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001217
Antoine Pitrou5136ac02012-01-13 18:52:16 +01001218 /* XXX the pyc's size field is ignored; timestamp collisions are probably
1219 unimportant with zip files. */
1220 code = PyMarshal_ReadObjectFromString(buf + 12, size - 12);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 if (code == NULL)
1222 return NULL;
1223 if (!PyCode_Check(code)) {
1224 Py_DECREF(code);
1225 PyErr_Format(PyExc_TypeError,
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001226 "compiled module %R is not a code object",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 pathname);
1228 return NULL;
1229 }
1230 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001231}
1232
1233/* Replace any occurances of "\r\n?" in the input string with "\n".
1234 This converts DOS and Mac line endings to Unix line endings.
1235 Also append a trailing "\n" to be compatible with
1236 PyParser_SimpleParseFile(). Returns a new reference. */
1237static PyObject *
1238normalize_line_endings(PyObject *source)
1239{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001240 char *buf, *q, *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 PyObject *fixed_source;
1242 int len = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +00001243
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001244 p = PyBytes_AsString(source);
1245 if (p == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001246 return PyBytes_FromStringAndSize("\n\0", 2);
1247 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001248
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 /* one char extra for trailing \n and one for terminating \0 */
1250 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1251 if (buf == NULL) {
1252 PyErr_SetString(PyExc_MemoryError,
1253 "zipimport: no memory to allocate "
1254 "source buffer");
1255 return NULL;
1256 }
1257 /* replace "\r\n?" by "\n" */
1258 for (q = buf; *p != '\0'; p++) {
1259 if (*p == '\r') {
1260 *q++ = '\n';
1261 if (*(p + 1) == '\n')
1262 p++;
1263 }
1264 else
1265 *q++ = *p;
1266 len++;
1267 }
1268 *q++ = '\n'; /* add trailing \n */
1269 *q = '\0';
1270 fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1271 PyMem_Free(buf);
1272 return fixed_source;
Just van Rossum52e14d62002-12-30 22:08:05 +00001273}
1274
1275/* Given a string buffer containing Python source code, compile it
Brett Cannon83358c92013-06-20 21:30:32 -04001276 and return a code object as a new reference. */
Just van Rossum52e14d62002-12-30 22:08:05 +00001277static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001278compile_source(PyObject *pathname, PyObject *source)
Just van Rossum52e14d62002-12-30 22:08:05 +00001279{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001280 PyObject *code, *fixed_source, *pathbytes;
Just van Rossum52e14d62002-12-30 22:08:05 +00001281
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001282 pathbytes = PyUnicode_EncodeFSDefault(pathname);
1283 if (pathbytes == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001285
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001286 fixed_source = normalize_line_endings(source);
1287 if (fixed_source == NULL) {
1288 Py_DECREF(pathbytes);
1289 return NULL;
1290 }
1291
1292 code = Py_CompileString(PyBytes_AsString(fixed_source),
1293 PyBytes_AsString(pathbytes),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001294 Py_file_input);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001295 Py_DECREF(pathbytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 Py_DECREF(fixed_source);
1297 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001298}
1299
1300/* Convert the date/time values found in the Zip archive to a value
1301 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001302static time_t
1303parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001304{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001305 struct tm stm;
Just van Rossum52e14d62002-12-30 22:08:05 +00001306
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001307 memset((void *) &stm, '\0', sizeof(stm));
Christian Heimes679db4a2008-01-18 09:56:22 +00001308
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 stm.tm_sec = (dostime & 0x1f) * 2;
1310 stm.tm_min = (dostime >> 5) & 0x3f;
1311 stm.tm_hour = (dostime >> 11) & 0x1f;
1312 stm.tm_mday = dosdate & 0x1f;
1313 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1314 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
1315 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001316
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 return mktime(&stm);
Just van Rossum52e14d62002-12-30 22:08:05 +00001318}
1319
1320/* Given a path to a .pyc or .pyo file in the archive, return the
Ezio Melotti13925002011-03-16 11:05:33 +02001321 modification time of the matching .py file, or 0 if no source
Just van Rossum52e14d62002-12-30 22:08:05 +00001322 is available. */
1323static time_t
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001324get_mtime_of_source(ZipImporter *self, PyObject *path)
Just van Rossum52e14d62002-12-30 22:08:05 +00001325{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001326 PyObject *toc_entry, *stripped;
1327 time_t mtime;
1328
1329 /* strip 'c' or 'o' from *.py[co] */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001330 if (PyUnicode_READY(path) == -1)
1331 return (time_t)-1;
1332 stripped = PyUnicode_FromKindAndData(PyUnicode_KIND(path),
1333 PyUnicode_DATA(path),
1334 PyUnicode_GET_LENGTH(path) - 1);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001335 if (stripped == NULL)
1336 return (time_t)-1;
1337
1338 toc_entry = PyDict_GetItem(self->files, stripped);
1339 Py_DECREF(stripped);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1341 PyTuple_Size(toc_entry) == 8) {
1342 /* fetch the time stamp of the .py file for comparison
1343 with an embedded pyc time stamp */
1344 int time, date;
1345 time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1346 date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1347 mtime = parse_dostime(time, date);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001348 } else
1349 mtime = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001350 return mtime;
Just van Rossum52e14d62002-12-30 22:08:05 +00001351}
1352
1353/* Return the code object for the module named by 'fullname' from the
1354 Zip archive as a new reference. */
1355static PyObject *
1356get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001357 time_t mtime, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +00001358{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001359 PyObject *data, *modpath, *code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001360
Victor Stinner60fe8d92010-08-16 23:48:11 +00001361 data = get_data(self->archive, toc_entry);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001362 if (data == NULL)
1363 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001364
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001365 modpath = PyTuple_GetItem(toc_entry, 0);
Victor Stinner2a94f4c2010-10-18 12:15:34 +00001366 if (isbytecode)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001367 code = unmarshal_code(modpath, data, mtime);
Victor Stinner2a94f4c2010-10-18 12:15:34 +00001368 else
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001369 code = compile_source(modpath, data);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 Py_DECREF(data);
1371 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001372}
1373
Ezio Melotti42da6632011-03-15 05:18:48 +02001374/* Get the code object associated with the module specified by
Just van Rossum52e14d62002-12-30 22:08:05 +00001375 'fullname'. */
1376static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001377get_module_code(ZipImporter *self, PyObject *fullname,
Victor Stinner08654e12010-10-18 12:09:02 +00001378 int *p_ispackage, PyObject **p_modpath)
Just van Rossum52e14d62002-12-30 22:08:05 +00001379{
Gregory P. Smith95c7c462011-05-21 05:19:42 -07001380 PyObject *code = NULL, *toc_entry, *subname;
Victor Stinner9a2261a2011-05-26 13:59:41 +02001381 PyObject *path, *fullpath = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +00001383
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001384 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001385 if (subname == NULL)
1386 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001387
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001388 path = make_filename(self->prefix, subname);
1389 Py_DECREF(subname);
1390 if (path == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001391 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 for (zso = zip_searchorder; *zso->suffix; zso++) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001394 code = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001395
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001396 fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
1397 if (fullpath == NULL)
1398 goto exit;
1399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001400 if (Py_VerboseFlag > 1)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001401 PySys_FormatStderr("# trying %U%c%U\n",
1402 self->archive, (int)SEP, fullpath);
1403 toc_entry = PyDict_GetItem(self->files, fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 if (toc_entry != NULL) {
1405 time_t mtime = 0;
1406 int ispackage = zso->type & IS_PACKAGE;
1407 int isbytecode = zso->type & IS_BYTECODE;
Just van Rossum52e14d62002-12-30 22:08:05 +00001408
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001409 if (isbytecode) {
1410 mtime = get_mtime_of_source(self, fullpath);
1411 if (mtime == (time_t)-1 && PyErr_Occurred()) {
1412 goto exit;
1413 }
1414 }
1415 Py_CLEAR(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001416 if (p_ispackage != NULL)
1417 *p_ispackage = ispackage;
1418 code = get_code_from_data(self, ispackage,
1419 isbytecode, mtime,
1420 toc_entry);
1421 if (code == Py_None) {
1422 /* bad magic number or non-matching mtime
1423 in byte code, try next */
1424 Py_DECREF(code);
1425 continue;
1426 }
Victor Stinner08654e12010-10-18 12:09:02 +00001427 if (code != NULL && p_modpath != NULL) {
1428 *p_modpath = PyTuple_GetItem(toc_entry, 0);
1429 Py_INCREF(*p_modpath);
1430 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001431 goto exit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001432 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001433 else
1434 Py_CLEAR(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001436 PyErr_Format(ZipImportError, "can't find module %R", fullname);
1437exit:
1438 Py_DECREF(path);
1439 Py_XDECREF(fullpath);
1440 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001441}
1442
1443
1444/* Module init */
1445
1446PyDoc_STRVAR(zipimport_doc,
1447"zipimport provides support for importing Python modules from Zip archives.\n\
1448\n\
1449This module exports three objects:\n\
1450- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001451- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001452 subclass of ImportError, so it can be caught as ImportError, too.\n\
1453- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1454 info dicts, as used in zipimporter._files.\n\
1455\n\
1456It is usually not needed to use the zipimport module explicitly; it is\n\
1457used by the builtin import mechanism for sys.path items that are paths\n\
1458to Zip archives.");
1459
Martin v. Löwis1a214512008-06-11 05:26:20 +00001460static struct PyModuleDef zipimportmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001461 PyModuleDef_HEAD_INIT,
1462 "zipimport",
1463 zipimport_doc,
1464 -1,
1465 NULL,
1466 NULL,
1467 NULL,
1468 NULL,
1469 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001470};
1471
Just van Rossum52e14d62002-12-30 22:08:05 +00001472PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001473PyInit_zipimport(void)
Just van Rossum52e14d62002-12-30 22:08:05 +00001474{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001475 PyObject *mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001476
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001477 if (PyType_Ready(&ZipImporter_Type) < 0)
1478 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001479
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 /* Correct directory separator */
1481 zip_searchorder[0].suffix[0] = SEP;
1482 zip_searchorder[1].suffix[0] = SEP;
1483 zip_searchorder[2].suffix[0] = SEP;
1484 if (Py_OptimizeFlag) {
1485 /* Reverse *.pyc and *.pyo */
1486 struct st_zip_searchorder tmp;
1487 tmp = zip_searchorder[0];
1488 zip_searchorder[0] = zip_searchorder[1];
1489 zip_searchorder[1] = tmp;
1490 tmp = zip_searchorder[3];
1491 zip_searchorder[3] = zip_searchorder[4];
1492 zip_searchorder[4] = tmp;
1493 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001494
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 mod = PyModule_Create(&zipimportmodule);
1496 if (mod == NULL)
1497 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001498
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1500 PyExc_ImportError, NULL);
1501 if (ZipImportError == NULL)
1502 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001503
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001504 Py_INCREF(ZipImportError);
1505 if (PyModule_AddObject(mod, "ZipImportError",
1506 ZipImportError) < 0)
1507 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001508
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 Py_INCREF(&ZipImporter_Type);
1510 if (PyModule_AddObject(mod, "zipimporter",
1511 (PyObject *)&ZipImporter_Type) < 0)
1512 return NULL;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001513
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001514 zip_directory_cache = PyDict_New();
1515 if (zip_directory_cache == NULL)
1516 return NULL;
1517 Py_INCREF(zip_directory_cache);
1518 if (PyModule_AddObject(mod, "_zip_directory_cache",
1519 zip_directory_cache) < 0)
1520 return NULL;
1521 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001522}