blob: 12bfe233fc57c79364400f6635b38e3abdc8bc12 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000013 char suffix[14];
14 int type;
Just van Rossum52e14d62002-12-30 22:08:05 +000015};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
Just van Rossum52e14d62002-12-30 22:08:05 +000030};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000037 PyObject_HEAD
Victor Stinner9e40fad2010-10-18 22:34:46 +000038 PyObject *archive; /* pathname of the Zip archive,
39 decoded from the filesystem encoding */
Victor Stinner72f767e2010-10-18 11:44:21 +000040 PyObject *prefix; /* file prefix: "a/sub/directory/",
41 encoded to the filesystem encoding */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000042 PyObject *files; /* dict with file info {path: toc_entry} */
Just van Rossum52e14d62002-12-30 22:08:05 +000043};
44
Just van Rossum52e14d62002-12-30 22:08:05 +000045static PyObject *ZipImportError;
Victor Stinnerc342fca2010-10-18 11:39:05 +000046/* read_directory() cache */
Just van Rossum52e14d62002-12-30 22:08:05 +000047static PyObject *zip_directory_cache = NULL;
48
49/* forward decls */
Victor Stinner2460a432010-08-16 17:54:28 +000050static PyObject *read_directory(PyObject *archive);
Victor Stinner60fe8d92010-08-16 23:48:11 +000051static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
Victor Stinnerf6b563a2011-03-14 20:46:50 -040052static PyObject *get_module_code(ZipImporter *self, PyObject *fullname,
Victor Stinner08654e12010-10-18 12:09:02 +000053 int *p_ispackage, PyObject **p_modpath);
Just van Rossum52e14d62002-12-30 22:08:05 +000054
55
56#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
57
58
59/* zipimporter.__init__
60 Split the "subdirectory" from the Zip archive path, lookup a matching
61 entry in sys.path_importer_cache, fetch the file directory from there
62 if found, or else read it from the archive. */
63static int
64zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
65{
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010066 PyObject *path, *files, *tmp;
67 PyObject *filename = NULL;
68 Py_ssize_t len, flen;
69#ifdef ALTSEP
70 _Py_IDENTIFIER(replace);
71#endif
Just van Rossum52e14d62002-12-30 22:08:05 +000072
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000073 if (!_PyArg_NoKeywords("zipimporter()", kwds))
74 return -1;
Georg Brandl02c42872005-08-26 06:42:30 +000075
Victor Stinner2b8dab72010-08-14 14:54:10 +000076 if (!PyArg_ParseTuple(args, "O&:zipimporter",
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010077 PyUnicode_FSDecoder, &path))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +000079
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010080 if (PyUnicode_READY(path) == -1)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +020081 return -1;
82
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010083 len = PyUnicode_GET_LENGTH(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 if (len == 0) {
85 PyErr_SetString(ZipImportError, "archive path is empty");
Victor Stinner2b8dab72010-08-14 14:54:10 +000086 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 }
Just van Rossum52e14d62002-12-30 22:08:05 +000088
89#ifdef ALTSEP
Martin v. Löwiscfa61292011-10-31 09:01:22 +010090 tmp = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010091 if (!tmp)
92 goto error;
93 Py_DECREF(path);
94 path = tmp;
Just van Rossum52e14d62002-12-30 22:08:05 +000095#endif
96
Martin v. Löwisa72e78b2011-10-31 08:33:37 +010097 filename = path;
98 Py_INCREF(filename);
99 flen = len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 for (;;) {
101 struct stat statbuf;
102 int rv;
Just van Rossum52e14d62002-12-30 22:08:05 +0000103
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100104 rv = _Py_stat(filename, &statbuf);
Victor Stinnerbd0850b2011-12-18 20:47:30 +0100105 if (rv == -2)
106 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107 if (rv == 0) {
108 /* it exists */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100109 if (!S_ISREG(statbuf.st_mode))
110 /* it's a not file */
111 Py_CLEAR(filename);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000112 break;
113 }
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100114 Py_CLEAR(filename);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 /* back up one path element */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100116 flen = PyUnicode_FindChar(path, SEP, 0, flen, -1);
117 if (flen == -1)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000118 break;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100119 filename = PyUnicode_Substring(path, 0, flen);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 }
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100121 if (filename == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 PyErr_SetString(ZipImportError, "not a Zip file");
Victor Stinner2b8dab72010-08-14 14:54:10 +0000123 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000125
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100126 if (PyUnicode_READY(filename) < 0)
127 goto error;
128
129 files = PyDict_GetItem(zip_directory_cache, filename);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000130 if (files == NULL) {
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100131 files = read_directory(filename);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000132 if (files == NULL)
133 goto error;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100134 if (PyDict_SetItem(zip_directory_cache, filename, files) != 0)
Victor Stinner2b8dab72010-08-14 14:54:10 +0000135 goto error;
136 }
137 else
138 Py_INCREF(files);
139 self->files = files;
140
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100141 /* Transfer reference */
142 self->archive = filename;
143 filename = NULL;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000144
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100145 /* Check if there is a prefix directory following the filename. */
146 if (flen != len) {
147 tmp = PyUnicode_Substring(path, flen+1,
148 PyUnicode_GET_LENGTH(path));
149 if (tmp == NULL)
150 goto error;
151 self->prefix = tmp;
152 if (PyUnicode_READ_CHAR(path, len-1) != SEP) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000153 /* add trailing SEP */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100154 tmp = PyUnicode_FromFormat("%U%c", self->prefix, SEP);
155 if (tmp == NULL)
156 goto error;
157 Py_DECREF(self->prefix);
158 self->prefix = tmp;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 }
160 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000161 else
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100162 self->prefix = PyUnicode_New(0, 0);
163 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000164 return 0;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000165
166error:
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100167 Py_DECREF(path);
168 Py_XDECREF(filename);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000169 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +0000170}
171
172/* GC support. */
173static int
174zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
175{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 ZipImporter *self = (ZipImporter *)obj;
177 Py_VISIT(self->files);
178 return 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000179}
180
181static void
182zipimporter_dealloc(ZipImporter *self)
183{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 PyObject_GC_UnTrack(self);
185 Py_XDECREF(self->archive);
186 Py_XDECREF(self->prefix);
187 Py_XDECREF(self->files);
188 Py_TYPE(self)->tp_free((PyObject *)self);
Just van Rossum52e14d62002-12-30 22:08:05 +0000189}
190
191static PyObject *
192zipimporter_repr(ZipImporter *self)
193{
Victor Stinner028dd972010-08-17 00:04:48 +0000194 if (self->archive == NULL)
195 return PyUnicode_FromString("<zipimporter object \"???\">");
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200196 else if (self->prefix != NULL && PyUnicode_GET_LENGTH(self->prefix) != 0)
Victor Stinner07298a12010-10-18 22:45:54 +0000197 return PyUnicode_FromFormat("<zipimporter object \"%U%c%U\">",
Victor Stinner028dd972010-08-17 00:04:48 +0000198 self->archive, SEP, self->prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000199 else
Victor Stinner07298a12010-10-18 22:45:54 +0000200 return PyUnicode_FromFormat("<zipimporter object \"%U\">",
Victor Stinner028dd972010-08-17 00:04:48 +0000201 self->archive);
Just van Rossum52e14d62002-12-30 22:08:05 +0000202}
203
204/* return fullname.split(".")[-1] */
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400205static PyObject *
206get_subname(PyObject *fullname)
Just van Rossum52e14d62002-12-30 22:08:05 +0000207{
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100208 Py_ssize_t len, dot;
209 if (PyUnicode_READY(fullname) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200210 return NULL;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100211 len = PyUnicode_GET_LENGTH(fullname);
212 dot = PyUnicode_FindChar(fullname, '.', 0, len, -1);
213 if (dot == -1) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400214 Py_INCREF(fullname);
215 return fullname;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100216 } else
217 return PyUnicode_Substring(fullname, dot+1, len);
Just van Rossum52e14d62002-12-30 22:08:05 +0000218}
219
220/* Given a (sub)modulename, write the potential file path in the
221 archive (without extension) to the path buffer. Return the
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400222 length of the resulting string.
223
224 return self.prefix + name.replace('.', os.sep) */
225static PyObject*
226make_filename(PyObject *prefix, PyObject *name)
Just van Rossum52e14d62002-12-30 22:08:05 +0000227{
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400228 PyObject *pathobj;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200229 Py_UCS4 *p, *buf;
230 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000231
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200232 len = PyUnicode_GET_LENGTH(prefix) + PyUnicode_GET_LENGTH(name) + 1;
233 p = buf = PyMem_Malloc(sizeof(Py_UCS4) * len);
234 if (buf == NULL) {
235 PyErr_NoMemory();
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400236 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200237 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000238
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200239 if (!PyUnicode_AsUCS4(prefix, p, len, 0))
240 return NULL;
241 p += PyUnicode_GET_LENGTH(prefix);
242 len -= PyUnicode_GET_LENGTH(prefix);
243 if (!PyUnicode_AsUCS4(name, p, len, 1))
244 return NULL;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400245 for (; *p; p++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 if (*p == '.')
247 *p = SEP;
248 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200249 pathobj = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
250 buf, p-buf);
251 PyMem_Free(buf);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400252 return pathobj;
Just van Rossum52e14d62002-12-30 22:08:05 +0000253}
254
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000255enum zi_module_info {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 MI_ERROR,
257 MI_NOT_FOUND,
258 MI_MODULE,
259 MI_PACKAGE
Just van Rossum52e14d62002-12-30 22:08:05 +0000260};
261
Eric V. Smith984b11f2012-05-24 20:21:04 -0400262/* Does this path represent a directory?
263 on error, return < 0
264 if not a dir, return 0
265 if a dir, return 1
266*/
267static int
268check_is_directory(ZipImporter *self, PyObject* prefix, PyObject *path)
269{
270 PyObject *dirpath;
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700271 int res;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400272
273 /* See if this is a "directory". If so, it's eligible to be part
274 of a namespace package. We test by seeing if the name, with an
275 appended path separator, exists. */
276 dirpath = PyUnicode_FromFormat("%U%U%c", prefix, path, SEP);
277 if (dirpath == NULL)
278 return -1;
279 /* If dirpath is present in self->files, we have a directory. */
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700280 res = PyDict_Contains(self->files, dirpath);
Eric V. Smith984b11f2012-05-24 20:21:04 -0400281 Py_DECREF(dirpath);
Benjamin Peterson18eac4a2012-05-25 00:24:42 -0700282 return res;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400283}
284
Just van Rossum52e14d62002-12-30 22:08:05 +0000285/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000286static enum zi_module_info
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400287get_module_info(ZipImporter *self, PyObject *fullname)
Just van Rossum52e14d62002-12-30 22:08:05 +0000288{
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400289 PyObject *subname;
290 PyObject *path, *fullpath, *item;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +0000292
Victor Stinner965a8a12010-10-18 21:44:33 +0000293 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400294 if (subname == NULL)
295 return MI_ERROR;
Just van Rossum52e14d62002-12-30 22:08:05 +0000296
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400297 path = make_filename(self->prefix, subname);
298 Py_DECREF(subname);
299 if (path == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000300 return MI_ERROR;
Just van Rossum52e14d62002-12-30 22:08:05 +0000301
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 for (zso = zip_searchorder; *zso->suffix; zso++) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400303 fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
304 if (fullpath == NULL) {
305 Py_DECREF(path);
306 return MI_ERROR;
307 }
308 item = PyDict_GetItem(self->files, fullpath);
309 Py_DECREF(fullpath);
310 if (item != NULL) {
311 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 if (zso->type & IS_PACKAGE)
313 return MI_PACKAGE;
314 else
315 return MI_MODULE;
316 }
317 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400318 Py_DECREF(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000319 return MI_NOT_FOUND;
Just van Rossum52e14d62002-12-30 22:08:05 +0000320}
321
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700322typedef enum {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700323 FL_ERROR,
324 FL_NOT_FOUND,
325 FL_MODULE_FOUND,
326 FL_NS_FOUND
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700327} find_loader_result;
328
Eric V. Smith984b11f2012-05-24 20:21:04 -0400329/* The guts of "find_loader" and "find_module". Return values:
330 -1: error
331 0: no loader or namespace portions found
332 1: module/package found
333 2: namespace portion found: *namespace_portion will point to the name
334*/
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700335static find_loader_result
Eric V. Smith984b11f2012-05-24 20:21:04 -0400336find_loader(ZipImporter *self, PyObject *fullname, PyObject **namespace_portion)
337{
338 enum zi_module_info mi;
339
340 *namespace_portion = NULL;
341
342 mi = get_module_info(self, fullname);
343 if (mi == MI_ERROR)
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700344 return FL_ERROR;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400345 if (mi == MI_NOT_FOUND) {
346 /* Not a module or regular package. See if this is a directory, and
347 therefore possibly a portion of a namespace package. */
348 int is_dir = check_is_directory(self, self->prefix, fullname);
349 if (is_dir < 0)
350 return -1;
351 if (is_dir) {
352 /* This is possibly a portion of a namespace
353 package. Return the string representing its path,
354 without a trailing separator. */
355 *namespace_portion = PyUnicode_FromFormat("%U%c%U%U",
356 self->archive, SEP,
357 self->prefix, fullname);
358 if (*namespace_portion == NULL)
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700359 return FL_ERROR;
360 return FL_NS_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400361 }
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700362 return FL_NOT_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400363 }
364 /* This is a module or package. */
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700365 return FL_MODULE_FOUND;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400366}
367
368
Just van Rossum52e14d62002-12-30 22:08:05 +0000369/* Check whether we can satisfy the import of the module named by
370 'fullname'. Return self if we can, None if we can't. */
371static PyObject *
372zipimporter_find_module(PyObject *obj, PyObject *args)
373{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 ZipImporter *self = (ZipImporter *)obj;
375 PyObject *path = NULL;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400376 PyObject *fullname;
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700377 PyObject *namespace_portion = NULL;
378 PyObject *result = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000379
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700380 if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module", &fullname, &path))
381 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000382
Eric V. Smith984b11f2012-05-24 20:21:04 -0400383 switch (find_loader(self, fullname, &namespace_portion)) {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700384 case FL_ERROR:
Benjamin Petersona6a7a1a2012-05-25 00:22:04 -0700385 return NULL;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700386 case FL_NS_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700387 /* A namespace portion is not allowed via find_module, so return None. */
Eric V. Smith984b11f2012-05-24 20:21:04 -0400388 Py_DECREF(namespace_portion);
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700389 /* FALL THROUGH */
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700390 case FL_NOT_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700391 result = Py_None;
392 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700393 case FL_MODULE_FOUND:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700394 result = (PyObject *)self;
395 break;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 }
Benjamin Petersona6a7a1a2012-05-25 00:22:04 -0700397 Py_INCREF(result);
Benjamin Peterson2d12e142012-05-25 00:19:40 -0700398 return result;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400399}
400
401
402/* Check whether we can satisfy the import of the module named by
403 'fullname', or whether it could be a portion of a namespace
404 package. Return self if we can load it, a string containing the
405 full path if it's a possible namespace portion, None if we
406 can't load it. */
407static PyObject *
408zipimporter_find_loader(PyObject *obj, PyObject *args)
409{
410 ZipImporter *self = (ZipImporter *)obj;
411 PyObject *path = NULL;
412 PyObject *fullname;
413 PyObject *result = NULL;
414 PyObject *namespace_portion = NULL;
415
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700416 if (!PyArg_ParseTuple(args, "U|O:zipimporter.find_module", &fullname, &path))
417 return NULL;
Eric V. Smith984b11f2012-05-24 20:21:04 -0400418
419 switch (find_loader(self, fullname, &namespace_portion)) {
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700420 case FL_ERROR:
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700421 return NULL;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700422 case FL_NOT_FOUND: /* Not found, return (None, []) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700423 result = Py_BuildValue("O[]", Py_None);
424 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700425 case FL_MODULE_FOUND: /* Return (self, []) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700426 result = Py_BuildValue("O[]", self);
427 break;
Benjamin Peterson46c214d2012-05-25 10:22:29 -0700428 case FL_NS_FOUND: /* Return (None, [namespace_portion]) */
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700429 result = Py_BuildValue("O[O]", Py_None, namespace_portion);
Benjamin Peterson209e04c2012-05-24 22:35:39 -0700430 Py_DECREF(namespace_portion);
Eric V. Smith984b11f2012-05-24 20:21:04 -0400431 return result;
432 }
Benjamin Peterson5ed7bd72012-05-24 22:54:15 -0700433 return result;
Just van Rossum52e14d62002-12-30 22:08:05 +0000434}
435
436/* Load and return the module named by 'fullname'. */
437static PyObject *
438zipimporter_load_module(PyObject *obj, PyObject *args)
439{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000440 ZipImporter *self = (ZipImporter *)obj;
Victor Stinner26fabe12010-10-18 12:03:25 +0000441 PyObject *code = NULL, *mod, *dict;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400442 PyObject *fullname;
443 PyObject *modpath = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 int ispackage;
Just van Rossum52e14d62002-12-30 22:08:05 +0000445
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400446 if (!PyArg_ParseTuple(args, "U:zipimporter.load_module",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000447 &fullname))
448 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200449 if (PyUnicode_READY(fullname) == -1)
450 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000451
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 code = get_module_code(self, fullname, &ispackage, &modpath);
453 if (code == NULL)
Victor Stinner26fabe12010-10-18 12:03:25 +0000454 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000455
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400456 mod = PyImport_AddModuleObject(fullname);
Victor Stinner26fabe12010-10-18 12:03:25 +0000457 if (mod == NULL)
458 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459 dict = PyModule_GetDict(mod);
Just van Rossum52e14d62002-12-30 22:08:05 +0000460
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 /* mod.__loader__ = self */
462 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
463 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000464
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 if (ispackage) {
466 /* add __path__ to the module *before* the code gets
467 executed */
468 PyObject *pkgpath, *fullpath;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400469 PyObject *subname = get_subname(fullname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000470 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000471
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400472 fullpath = PyUnicode_FromFormat("%U%c%U%U",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000473 self->archive, SEP,
474 self->prefix, subname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400475 Py_DECREF(subname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000476 if (fullpath == NULL)
477 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000478
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400479 pkgpath = Py_BuildValue("[N]", fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000480 if (pkgpath == NULL)
481 goto error;
482 err = PyDict_SetItemString(dict, "__path__", pkgpath);
483 Py_DECREF(pkgpath);
484 if (err != 0)
485 goto error;
486 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400487 mod = PyImport_ExecCodeModuleObject(fullname, code, modpath, NULL);
Victor Stinner26fabe12010-10-18 12:03:25 +0000488 Py_CLEAR(code);
489 if (mod == NULL)
490 goto error;
491
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000492 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400493 PySys_FormatStderr("import %U # loaded from Zip %U\n",
Victor Stinner08654e12010-10-18 12:09:02 +0000494 fullname, modpath);
495 Py_DECREF(modpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000496 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +0000497error:
Victor Stinner26fabe12010-10-18 12:03:25 +0000498 Py_XDECREF(code);
Victor Stinner08654e12010-10-18 12:09:02 +0000499 Py_XDECREF(modpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000501}
502
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000503/* Return a string matching __file__ for the named module */
504static PyObject *
505zipimporter_get_filename(PyObject *obj, PyObject *args)
506{
507 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400508 PyObject *fullname, *code, *modpath;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000509 int ispackage;
510
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400511 if (!PyArg_ParseTuple(args, "U:zipimporter.get_filename",
Victor Stinner9e40fad2010-10-18 22:34:46 +0000512 &fullname))
Victor Stinnerc342fca2010-10-18 11:39:05 +0000513 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000514
515 /* Deciding the filename requires working out where the code
516 would come from if the module was actually loaded */
517 code = get_module_code(self, fullname, &ispackage, &modpath);
518 if (code == NULL)
Victor Stinnerc342fca2010-10-18 11:39:05 +0000519 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000520 Py_DECREF(code); /* Only need the path info */
521
Victor Stinner08654e12010-10-18 12:09:02 +0000522 return modpath;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000523}
524
Just van Rossum52e14d62002-12-30 22:08:05 +0000525/* Return a bool signifying whether the module is a package or not. */
526static PyObject *
527zipimporter_is_package(PyObject *obj, PyObject *args)
528{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400530 PyObject *fullname;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000531 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000532
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400533 if (!PyArg_ParseTuple(args, "U:zipimporter.is_package",
Victor Stinner965a8a12010-10-18 21:44:33 +0000534 &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000535 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000536
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000537 mi = get_module_info(self, fullname);
538 if (mi == MI_ERROR)
Victor Stinner965a8a12010-10-18 21:44:33 +0000539 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000540 if (mi == MI_NOT_FOUND) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400541 PyErr_Format(ZipImportError, "can't find module %R", fullname);
Victor Stinner965a8a12010-10-18 21:44:33 +0000542 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 }
544 return PyBool_FromLong(mi == MI_PACKAGE);
Just van Rossum52e14d62002-12-30 22:08:05 +0000545}
546
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200547
Just van Rossum52e14d62002-12-30 22:08:05 +0000548static PyObject *
549zipimporter_get_data(PyObject *obj, PyObject *args)
550{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 ZipImporter *self = (ZipImporter *)obj;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100552 PyObject *path, *key;
Just van Rossum52e14d62002-12-30 22:08:05 +0000553#ifdef ALTSEP
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100554 _Py_IDENTIFIER(replace);
Just van Rossum52e14d62002-12-30 22:08:05 +0000555#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000556 PyObject *toc_entry;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100557 Py_ssize_t path_start, path_len, len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000558
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100559 if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &path))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000560 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000561
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200562#ifdef ALTSEP
Martin v. Löwiscfa61292011-10-31 09:01:22 +0100563 path = _PyObject_CallMethodId(path, &PyId_replace, "CC", ALTSEP, SEP);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100564 if (!path)
565 return NULL;
566#else
567 Py_INCREF(path);
Just van Rossum52e14d62002-12-30 22:08:05 +0000568#endif
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100569 if (PyUnicode_READY(path) == -1)
570 goto error;
571
572 path_len = PyUnicode_GET_LENGTH(path);
573
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200574 len = PyUnicode_GET_LENGTH(self->archive);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100575 path_start = 0;
576 if (PyUnicode_Tailmatch(path, self->archive, 0, len, -1)
577 && PyUnicode_READ_CHAR(path, len) == SEP) {
578 path_start = len + 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000579 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000580
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100581 key = PyUnicode_Substring(path, path_start, path_len);
Victor Stinner60fe8d92010-08-16 23:48:11 +0000582 if (key == NULL)
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100583 goto error;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000584 toc_entry = PyDict_GetItem(self->files, key);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000585 if (toc_entry == NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000586 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key);
587 Py_DECREF(key);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100588 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000589 }
Victor Stinner60fe8d92010-08-16 23:48:11 +0000590 Py_DECREF(key);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100591 Py_DECREF(path);
Victor Stinner60fe8d92010-08-16 23:48:11 +0000592 return get_data(self->archive, toc_entry);
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100593 error:
594 Py_DECREF(path);
595 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000596}
597
598static PyObject *
599zipimporter_get_code(PyObject *obj, PyObject *args)
600{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000601 ZipImporter *self = (ZipImporter *)obj;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400602 PyObject *fullname;
Just van Rossum52e14d62002-12-30 22:08:05 +0000603
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400604 if (!PyArg_ParseTuple(args, "U:zipimporter.get_code", &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000607 return get_module_code(self, fullname, NULL, NULL);
Just van Rossum52e14d62002-12-30 22:08:05 +0000608}
609
610static PyObject *
611zipimporter_get_source(PyObject *obj, PyObject *args)
612{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000613 ZipImporter *self = (ZipImporter *)obj;
614 PyObject *toc_entry;
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400615 PyObject *fullname, *subname, *path, *fullpath;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000616 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000617
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400618 if (!PyArg_ParseTuple(args, "U:zipimporter.get_source", &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000619 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000620
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000621 mi = get_module_info(self, fullname);
Victor Stinner965a8a12010-10-18 21:44:33 +0000622 if (mi == MI_ERROR)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000623 return NULL;
Victor Stinner04106562010-10-18 20:44:08 +0000624 if (mi == MI_NOT_FOUND) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400625 PyErr_Format(ZipImportError, "can't find module %R", fullname);
Victor Stinner04106562010-10-18 20:44:08 +0000626 return NULL;
627 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400628
Victor Stinner965a8a12010-10-18 21:44:33 +0000629 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400630 if (subname == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000631 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000632
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400633 path = make_filename(self->prefix, subname);
634 Py_DECREF(subname);
635 if (path == NULL)
636 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000637
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400638 if (mi == MI_PACKAGE)
639 fullpath = PyUnicode_FromFormat("%U%c__init__.py", path, SEP);
640 else
641 fullpath = PyUnicode_FromFormat("%U.py", path);
642 Py_DECREF(path);
643 if (fullpath == NULL)
644 return NULL;
645
646 toc_entry = PyDict_GetItem(self->files, fullpath);
647 Py_DECREF(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000648 if (toc_entry != NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000649 PyObject *res, *bytes;
650 bytes = get_data(self->archive, toc_entry);
651 if (bytes == NULL)
652 return NULL;
653 res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
654 PyBytes_GET_SIZE(bytes));
655 Py_DECREF(bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 return res;
657 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000659 /* we have the module, but no source */
660 Py_INCREF(Py_None);
661 return Py_None;
Just van Rossum52e14d62002-12-30 22:08:05 +0000662}
663
664PyDoc_STRVAR(doc_find_module,
665"find_module(fullname, path=None) -> self or None.\n\
666\n\
667Search for a module specified by 'fullname'. 'fullname' must be the\n\
668fully qualified (dotted) module name. It returns the zipimporter\n\
669instance itself if the module was found, or None if it wasn't.\n\
670The optional 'path' argument is ignored -- it's there for compatibility\n\
671with the importer protocol.");
672
Eric V. Smith984b11f2012-05-24 20:21:04 -0400673PyDoc_STRVAR(doc_find_loader,
674"find_loader(fullname, path=None) -> self, str or None.\n\
675\n\
676Search for a module specified by 'fullname'. 'fullname' must be the\n\
677fully qualified (dotted) module name. It returns the zipimporter\n\
678instance itself if the module was found, a string containing the\n\
679full path name if it's possibly a portion of a namespace package,\n\
680or None otherwise. The optional 'path' argument is ignored -- it's\n\
681 there for compatibility with the importer protocol.");
682
Just van Rossum52e14d62002-12-30 22:08:05 +0000683PyDoc_STRVAR(doc_load_module,
684"load_module(fullname) -> module.\n\
685\n\
686Load the module specified by 'fullname'. 'fullname' must be the\n\
687fully qualified (dotted) module name. It returns the imported\n\
688module, or raises ZipImportError if it wasn't found.");
689
690PyDoc_STRVAR(doc_get_data,
691"get_data(pathname) -> string with file data.\n\
692\n\
693Return the data associated with 'pathname'. Raise IOError if\n\
694the file wasn't found.");
695
696PyDoc_STRVAR(doc_is_package,
697"is_package(fullname) -> bool.\n\
698\n\
699Return True if the module specified by fullname is a package.\n\
Brian Curtin32839732010-07-21 01:44:19 +0000700Raise ZipImportError if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000701
702PyDoc_STRVAR(doc_get_code,
703"get_code(fullname) -> code object.\n\
704\n\
705Return the code object for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000706if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000707
708PyDoc_STRVAR(doc_get_source,
709"get_source(fullname) -> source string.\n\
710\n\
711Return the source code for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000712if the module couldn't be found, return None if the archive does\n\
Just van Rossum52e14d62002-12-30 22:08:05 +0000713contain the module, but has no source for it.");
714
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000715
716PyDoc_STRVAR(doc_get_filename,
Nick Coghlan9a1d6e32009-02-08 03:37:27 +0000717"get_filename(fullname) -> filename string.\n\
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000718\n\
719Return the filename for the specified module.");
720
Just van Rossum52e14d62002-12-30 22:08:05 +0000721static PyMethodDef zipimporter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000722 {"find_module", zipimporter_find_module, METH_VARARGS,
723 doc_find_module},
Eric V. Smith984b11f2012-05-24 20:21:04 -0400724 {"find_loader", zipimporter_find_loader, METH_VARARGS,
725 doc_find_loader},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 {"load_module", zipimporter_load_module, METH_VARARGS,
727 doc_load_module},
728 {"get_data", zipimporter_get_data, METH_VARARGS,
729 doc_get_data},
730 {"get_code", zipimporter_get_code, METH_VARARGS,
731 doc_get_code},
732 {"get_source", zipimporter_get_source, METH_VARARGS,
733 doc_get_source},
734 {"get_filename", zipimporter_get_filename, METH_VARARGS,
735 doc_get_filename},
736 {"is_package", zipimporter_is_package, METH_VARARGS,
737 doc_is_package},
738 {NULL, NULL} /* sentinel */
Just van Rossum52e14d62002-12-30 22:08:05 +0000739};
740
741static PyMemberDef zipimporter_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000742 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
743 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
744 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
745 {NULL}
Just van Rossum52e14d62002-12-30 22:08:05 +0000746};
747
748PyDoc_STRVAR(zipimporter_doc,
749"zipimporter(archivepath) -> zipimporter object\n\
750\n\
751Create a new zipimporter instance. 'archivepath' must be a path to\n\
Alexandre Vassalotti8ae3e052008-05-16 00:41:41 +0000752a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
753'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
754valid directory inside the archive.\n\
755\n\
756'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
757archive.\n\
758\n\
759The 'archive' attribute of zipimporter objects contains the name of the\n\
760zipfile targeted.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000761
762#define DEFERRED_ADDRESS(ADDR) 0
763
764static PyTypeObject ZipImporter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000765 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
766 "zipimport.zipimporter",
767 sizeof(ZipImporter),
768 0, /* tp_itemsize */
769 (destructor)zipimporter_dealloc, /* tp_dealloc */
770 0, /* tp_print */
771 0, /* tp_getattr */
772 0, /* tp_setattr */
773 0, /* tp_reserved */
774 (reprfunc)zipimporter_repr, /* tp_repr */
775 0, /* tp_as_number */
776 0, /* tp_as_sequence */
777 0, /* tp_as_mapping */
778 0, /* tp_hash */
779 0, /* tp_call */
780 0, /* tp_str */
781 PyObject_GenericGetAttr, /* tp_getattro */
782 0, /* tp_setattro */
783 0, /* tp_as_buffer */
784 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
785 Py_TPFLAGS_HAVE_GC, /* tp_flags */
786 zipimporter_doc, /* tp_doc */
787 zipimporter_traverse, /* tp_traverse */
788 0, /* tp_clear */
789 0, /* tp_richcompare */
790 0, /* tp_weaklistoffset */
791 0, /* tp_iter */
792 0, /* tp_iternext */
793 zipimporter_methods, /* tp_methods */
794 zipimporter_members, /* tp_members */
795 0, /* tp_getset */
796 0, /* tp_base */
797 0, /* tp_dict */
798 0, /* tp_descr_get */
799 0, /* tp_descr_set */
800 0, /* tp_dictoffset */
801 (initproc)zipimporter_init, /* tp_init */
802 PyType_GenericAlloc, /* tp_alloc */
803 PyType_GenericNew, /* tp_new */
804 PyObject_GC_Del, /* tp_free */
Just van Rossum52e14d62002-12-30 22:08:05 +0000805};
806
807
808/* implementation */
809
Just van Rossum52e14d62002-12-30 22:08:05 +0000810/* Given a buffer, return the long that is represented by the first
811 4 bytes, encoded as little endian. This partially reimplements
812 marshal.c:r_long() */
813static long
814get_long(unsigned char *buf) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 long x;
816 x = buf[0];
817 x |= (long)buf[1] << 8;
818 x |= (long)buf[2] << 16;
819 x |= (long)buf[3] << 24;
Just van Rossum52e14d62002-12-30 22:08:05 +0000820#if SIZEOF_LONG > 4
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000821 /* Sign extension for 64-bit machines */
822 x |= -(x & 0x80000000L);
Just van Rossum52e14d62002-12-30 22:08:05 +0000823#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000824 return x;
Just van Rossum52e14d62002-12-30 22:08:05 +0000825}
826
827/*
828 read_directory(archive) -> files dict (new reference)
829
830 Given a path to a Zip archive, build a dict, mapping file names
831 (local to the archive, using SEP as a separator) to toc entries.
832
833 A toc_entry is a tuple:
834
Victor Stinner08654e12010-10-18 12:09:02 +0000835 (__file__, # value to use for __file__, available for all files,
836 # encoded to the filesystem encoding
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000837 compress, # compression kind; 0 for uncompressed
838 data_size, # size of compressed data on disk
839 file_size, # size of decompressed data
840 file_offset, # offset of file header from start of archive
841 time, # mod time of file (in dos format)
842 date, # mod data of file (in dos format)
843 crc, # crc checksum of the data
Victor Stinnerc342fca2010-10-18 11:39:05 +0000844 )
Just van Rossum52e14d62002-12-30 22:08:05 +0000845
846 Directories can be recognized by the trailing SEP in the name,
847 data_size and file_offset are 0.
848*/
849static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400850read_directory(PyObject *archive)
Just van Rossum52e14d62002-12-30 22:08:05 +0000851{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000852 PyObject *files = NULL;
853 FILE *fp;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000854 unsigned short flags;
Gregory P. Smithab320662012-01-30 15:17:33 -0800855 short compress, time, date, name_size;
856 long crc, data_size, file_size, header_size;
857 Py_ssize_t file_offset, header_position, header_offset;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200858 long l, count;
859 Py_ssize_t i;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000860 char name[MAXPATHLEN + 5];
Victor Stinner2460a432010-08-16 17:54:28 +0000861 PyObject *nameobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 char *p, endof_central_dir[22];
Gregory P. Smithab320662012-01-30 15:17:33 -0800863 Py_ssize_t arc_offset; /* Absolute offset to start of the zip-archive. */
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100864 PyObject *path;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000865 const char *charset;
Victor Stinner4ee65a92011-01-22 10:30:29 +0000866 int bootstrap;
Just van Rossum52e14d62002-12-30 22:08:05 +0000867
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400868 fp = _Py_fopen(archive, "rb");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000869 if (fp == NULL) {
Victor Stinnerbd206e22011-12-18 21:04:17 +0100870 if (!PyErr_Occurred())
Victor Stinner35734762011-12-18 21:05:22 +0100871 PyErr_Format(ZipImportError, "can't open Zip file: %R", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 return NULL;
873 }
874 fseek(fp, -22, SEEK_END);
875 header_position = ftell(fp);
876 if (fread(endof_central_dir, 1, 22, fp) != 22) {
877 fclose(fp);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400878 PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000879 return NULL;
880 }
881 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
882 /* Bad: End of Central Dir signature */
883 fclose(fp);
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400884 PyErr_Format(ZipImportError, "not a Zip file: %R", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000885 return NULL;
886 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000887
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000888 header_size = get_long((unsigned char *)endof_central_dir + 12);
889 header_offset = get_long((unsigned char *)endof_central_dir + 16);
890 arc_offset = header_position - header_offset - header_size;
891 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000892
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000893 files = PyDict_New();
894 if (files == NULL)
895 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000896
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000897 /* Start of Central Directory */
898 count = 0;
899 for (;;) {
900 PyObject *t;
901 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000902
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 fseek(fp, header_offset, 0); /* Start of file header */
904 l = PyMarshal_ReadLongFromFile(fp);
905 if (l != 0x02014B50)
906 break; /* Bad: Central Dir File Header */
Victor Stinnerd36c8212010-10-18 12:13:46 +0000907 fseek(fp, header_offset + 8, 0);
908 flags = (unsigned short)PyMarshal_ReadShortFromFile(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000909 compress = PyMarshal_ReadShortFromFile(fp);
910 time = PyMarshal_ReadShortFromFile(fp);
911 date = PyMarshal_ReadShortFromFile(fp);
912 crc = PyMarshal_ReadLongFromFile(fp);
913 data_size = PyMarshal_ReadLongFromFile(fp);
914 file_size = PyMarshal_ReadLongFromFile(fp);
915 name_size = PyMarshal_ReadShortFromFile(fp);
916 header_size = 46 + name_size +
917 PyMarshal_ReadShortFromFile(fp) +
918 PyMarshal_ReadShortFromFile(fp);
919 fseek(fp, header_offset + 42, 0);
920 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
921 if (name_size > MAXPATHLEN)
922 name_size = MAXPATHLEN;
Just van Rossum52e14d62002-12-30 22:08:05 +0000923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000924 p = name;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200925 for (i = 0; i < (Py_ssize_t)name_size; i++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000926 *p = (char)getc(fp);
927 if (*p == '/')
928 *p = SEP;
929 p++;
930 }
931 *p = 0; /* Add terminating null byte */
932 header_offset += header_size;
Just van Rossum52e14d62002-12-30 22:08:05 +0000933
Victor Stinner4ee65a92011-01-22 10:30:29 +0000934 bootstrap = 0;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000935 if (flags & 0x0800)
936 charset = "utf-8";
Victor Stinner4ee65a92011-01-22 10:30:29 +0000937 else if (!PyThreadState_GET()->interp->codecs_initialized) {
938 /* During bootstrap, we may need to load the encodings
939 package from a ZIP file. But the cp437 encoding is implemented
940 in Python in the encodings package.
941
942 Break out of this dependency by assuming that the path to
943 the encodings module is ASCII-only. */
944 charset = "ascii";
945 bootstrap = 1;
946 }
Victor Stinnerd36c8212010-10-18 12:13:46 +0000947 else
948 charset = "cp437";
949 nameobj = PyUnicode_Decode(name, name_size, charset, NULL);
Victor Stinner4ee65a92011-01-22 10:30:29 +0000950 if (nameobj == NULL) {
951 if (bootstrap)
952 PyErr_Format(PyExc_NotImplementedError,
953 "bootstrap issue: python%i%i.zip contains non-ASCII "
954 "filenames without the unicode flag",
955 PY_MAJOR_VERSION, PY_MINOR_VERSION);
Victor Stinner2460a432010-08-16 17:54:28 +0000956 goto error;
Victor Stinner4ee65a92011-01-22 10:30:29 +0000957 }
Stefan Krah000fde92012-08-20 14:14:49 +0200958 if (PyUnicode_READY(nameobj) == -1)
959 goto error;
Martin v. Löwisa72e78b2011-10-31 08:33:37 +0100960 path = PyUnicode_FromFormat("%U%c%U", archive, SEP, nameobj);
961 if (path == NULL)
Victor Stinner2460a432010-08-16 17:54:28 +0000962 goto error;
Gregory P. Smithcc6abd52012-01-30 15:55:29 -0800963 t = Py_BuildValue("Nhllnhhl", path, compress, data_size,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000964 file_size, file_offset, time, date, crc);
965 if (t == NULL)
966 goto error;
Victor Stinner2460a432010-08-16 17:54:28 +0000967 err = PyDict_SetItem(files, nameobj, t);
968 Py_CLEAR(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000969 Py_DECREF(t);
970 if (err != 0)
971 goto error;
972 count++;
973 }
974 fclose(fp);
975 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -0400976 PySys_FormatStderr("# zipimport: found %ld names in %R\n",
977 count, archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000978 return files;
Just van Rossum52e14d62002-12-30 22:08:05 +0000979error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000980 fclose(fp);
981 Py_XDECREF(files);
Victor Stinner2460a432010-08-16 17:54:28 +0000982 Py_XDECREF(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000983 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000984}
985
986/* Return the zlib.decompress function object, or NULL if zlib couldn't
987 be imported. The function is cached when found, so subsequent calls
Victor Stinner4925cde2011-05-20 00:16:09 +0200988 don't import zlib again. */
Just van Rossum52e14d62002-12-30 22:08:05 +0000989static PyObject *
990get_decompress_func(void)
991{
Victor Stinner4925cde2011-05-20 00:16:09 +0200992 static int importing_zlib = 0;
993 PyObject *zlib;
994 PyObject *decompress;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200995 _Py_IDENTIFIER(decompress);
Just van Rossum52e14d62002-12-30 22:08:05 +0000996
Victor Stinner4925cde2011-05-20 00:16:09 +0200997 if (importing_zlib != 0)
998 /* Someone has a zlib.py[co] in their Zip file;
999 let's avoid a stack overflow. */
1000 return NULL;
1001 importing_zlib = 1;
1002 zlib = PyImport_ImportModuleNoBlock("zlib");
1003 importing_zlib = 0;
1004 if (zlib != NULL) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001005 decompress = _PyObject_GetAttrId(zlib,
1006 &PyId_decompress);
Victor Stinner4925cde2011-05-20 00:16:09 +02001007 Py_DECREF(zlib);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001008 }
Victor Stinner4925cde2011-05-20 00:16:09 +02001009 else {
1010 PyErr_Clear();
1011 decompress = NULL;
1012 }
1013 if (Py_VerboseFlag)
1014 PySys_WriteStderr("# zipimport: zlib %s\n",
1015 zlib != NULL ? "available": "UNAVAILABLE");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001016 return decompress;
Just van Rossum52e14d62002-12-30 22:08:05 +00001017}
1018
1019/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
1020 data as a new reference. */
1021static PyObject *
Victor Stinner60fe8d92010-08-16 23:48:11 +00001022get_data(PyObject *archive, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +00001023{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 PyObject *raw_data, *data = NULL, *decompress;
1025 char *buf;
1026 FILE *fp;
1027 int err;
1028 Py_ssize_t bytes_read = 0;
1029 long l;
Victor Stinner60fe8d92010-08-16 23:48:11 +00001030 PyObject *datapath;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001031 long compress, data_size, file_size, file_offset, bytes_size;
1032 long time, date, crc;
Just van Rossum52e14d62002-12-30 22:08:05 +00001033
Victor Stinner60fe8d92010-08-16 23:48:11 +00001034 if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001035 &data_size, &file_size, &file_offset, &time,
1036 &date, &crc)) {
1037 return NULL;
1038 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001039
Victor Stinner60fe8d92010-08-16 23:48:11 +00001040 fp = _Py_fopen(archive, "rb");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001041 if (!fp) {
Victor Stinnerbd206e22011-12-18 21:04:17 +01001042 if (!PyErr_Occurred())
1043 PyErr_Format(PyExc_IOError,
1044 "zipimport: can not open file %U", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001045 return NULL;
1046 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001047
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 /* Check to make sure the local file header is correct */
1049 fseek(fp, file_offset, 0);
1050 l = PyMarshal_ReadLongFromFile(fp);
1051 if (l != 0x04034B50) {
1052 /* Bad: Local File Header */
1053 PyErr_Format(ZipImportError,
Victor Stinner60fe8d92010-08-16 23:48:11 +00001054 "bad local file header in %U",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001055 archive);
1056 fclose(fp);
1057 return NULL;
1058 }
1059 fseek(fp, file_offset + 26, 0);
1060 l = 30 + PyMarshal_ReadShortFromFile(fp) +
1061 PyMarshal_ReadShortFromFile(fp); /* local header size */
1062 file_offset += l; /* Start of file data */
Just van Rossum52e14d62002-12-30 22:08:05 +00001063
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001064 bytes_size = compress == 0 ? data_size : data_size + 1;
1065 if (bytes_size == 0)
1066 bytes_size++;
1067 raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
Just van Rossum52e14d62002-12-30 22:08:05 +00001068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 if (raw_data == NULL) {
1070 fclose(fp);
1071 return NULL;
1072 }
1073 buf = PyBytes_AsString(raw_data);
Just van Rossum52e14d62002-12-30 22:08:05 +00001074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 err = fseek(fp, file_offset, 0);
1076 if (err == 0)
1077 bytes_read = fread(buf, 1, data_size, fp);
1078 fclose(fp);
1079 if (err || bytes_read != data_size) {
1080 PyErr_SetString(PyExc_IOError,
1081 "zipimport: can't read data");
1082 Py_DECREF(raw_data);
1083 return NULL;
1084 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001085
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 if (compress != 0) {
1087 buf[data_size] = 'Z'; /* saw this in zipfile.py */
1088 data_size++;
1089 }
1090 buf[data_size] = '\0';
Just van Rossum52e14d62002-12-30 22:08:05 +00001091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 if (compress == 0) { /* data is not compressed */
1093 data = PyBytes_FromStringAndSize(buf, data_size);
1094 Py_DECREF(raw_data);
1095 return data;
1096 }
1097
1098 /* Decompress with zlib */
1099 decompress = get_decompress_func();
1100 if (decompress == NULL) {
1101 PyErr_SetString(ZipImportError,
1102 "can't decompress data; "
1103 "zlib not available");
1104 goto error;
1105 }
1106 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Victor Stinner4925cde2011-05-20 00:16:09 +02001107 Py_DECREF(decompress);
Just van Rossum52e14d62002-12-30 22:08:05 +00001108error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 Py_DECREF(raw_data);
1110 return data;
Just van Rossum52e14d62002-12-30 22:08:05 +00001111}
1112
1113/* Lenient date/time comparison function. The precision of the mtime
1114 in the archive is lower than the mtime stored in a .pyc: we
1115 must allow a difference of at most one second. */
1116static int
1117eq_mtime(time_t t1, time_t t2)
1118{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001119 time_t d = t1 - t2;
1120 if (d < 0)
1121 d = -d;
1122 /* dostime only stores even seconds, so be lenient */
1123 return d <= 1;
Just van Rossum52e14d62002-12-30 22:08:05 +00001124}
1125
1126/* Given the contents of a .py[co] file in a buffer, unmarshal the data
1127 and return the code object. Return None if it the magic word doesn't
1128 match (we do this instead of raising an exception as we fall back
1129 to .py if available and we don't want to mask other errors).
1130 Returns a new reference. */
1131static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001132unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
Just van Rossum52e14d62002-12-30 22:08:05 +00001133{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 PyObject *code;
1135 char *buf = PyBytes_AsString(data);
1136 Py_ssize_t size = PyBytes_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +00001137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 if (size <= 9) {
1139 PyErr_SetString(ZipImportError,
1140 "bad pyc data");
1141 return NULL;
1142 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
1145 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001146 PySys_FormatStderr("# %R has bad magic\n",
1147 pathname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 Py_INCREF(Py_None);
1149 return Py_None; /* signal caller to try alternative */
1150 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001151
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001152 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
1153 mtime)) {
1154 if (Py_VerboseFlag)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001155 PySys_FormatStderr("# %R has bad mtime\n",
1156 pathname);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 Py_INCREF(Py_None);
1158 return Py_None; /* signal caller to try alternative */
1159 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001160
Antoine Pitrou5136ac02012-01-13 18:52:16 +01001161 /* XXX the pyc's size field is ignored; timestamp collisions are probably
1162 unimportant with zip files. */
1163 code = PyMarshal_ReadObjectFromString(buf + 12, size - 12);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 if (code == NULL)
1165 return NULL;
1166 if (!PyCode_Check(code)) {
1167 Py_DECREF(code);
1168 PyErr_Format(PyExc_TypeError,
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001169 "compiled module %R is not a code object",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 pathname);
1171 return NULL;
1172 }
1173 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001174}
1175
1176/* Replace any occurances of "\r\n?" in the input string with "\n".
1177 This converts DOS and Mac line endings to Unix line endings.
1178 Also append a trailing "\n" to be compatible with
1179 PyParser_SimpleParseFile(). Returns a new reference. */
1180static PyObject *
1181normalize_line_endings(PyObject *source)
1182{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001183 char *buf, *q, *p;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001184 PyObject *fixed_source;
1185 int len = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +00001186
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001187 p = PyBytes_AsString(source);
1188 if (p == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001189 return PyBytes_FromStringAndSize("\n\0", 2);
1190 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 /* one char extra for trailing \n and one for terminating \0 */
1193 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1194 if (buf == NULL) {
1195 PyErr_SetString(PyExc_MemoryError,
1196 "zipimport: no memory to allocate "
1197 "source buffer");
1198 return NULL;
1199 }
1200 /* replace "\r\n?" by "\n" */
1201 for (q = buf; *p != '\0'; p++) {
1202 if (*p == '\r') {
1203 *q++ = '\n';
1204 if (*(p + 1) == '\n')
1205 p++;
1206 }
1207 else
1208 *q++ = *p;
1209 len++;
1210 }
1211 *q++ = '\n'; /* add trailing \n */
1212 *q = '\0';
1213 fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1214 PyMem_Free(buf);
1215 return fixed_source;
Just van Rossum52e14d62002-12-30 22:08:05 +00001216}
1217
1218/* Given a string buffer containing Python source code, compile it
1219 return and return a code object as a new reference. */
1220static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001221compile_source(PyObject *pathname, PyObject *source)
Just van Rossum52e14d62002-12-30 22:08:05 +00001222{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001223 PyObject *code, *fixed_source, *pathbytes;
Just van Rossum52e14d62002-12-30 22:08:05 +00001224
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001225 pathbytes = PyUnicode_EncodeFSDefault(pathname);
1226 if (pathbytes == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001228
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001229 fixed_source = normalize_line_endings(source);
1230 if (fixed_source == NULL) {
1231 Py_DECREF(pathbytes);
1232 return NULL;
1233 }
1234
1235 code = Py_CompileString(PyBytes_AsString(fixed_source),
1236 PyBytes_AsString(pathbytes),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001237 Py_file_input);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001238 Py_DECREF(pathbytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 Py_DECREF(fixed_source);
1240 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001241}
1242
1243/* Convert the date/time values found in the Zip archive to a value
1244 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001245static time_t
1246parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001247{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001248 struct tm stm;
Just van Rossum52e14d62002-12-30 22:08:05 +00001249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 memset((void *) &stm, '\0', sizeof(stm));
Christian Heimes679db4a2008-01-18 09:56:22 +00001251
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001252 stm.tm_sec = (dostime & 0x1f) * 2;
1253 stm.tm_min = (dostime >> 5) & 0x3f;
1254 stm.tm_hour = (dostime >> 11) & 0x1f;
1255 stm.tm_mday = dosdate & 0x1f;
1256 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1257 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
1258 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001259
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001260 return mktime(&stm);
Just van Rossum52e14d62002-12-30 22:08:05 +00001261}
1262
1263/* Given a path to a .pyc or .pyo file in the archive, return the
Ezio Melotti13925002011-03-16 11:05:33 +02001264 modification time of the matching .py file, or 0 if no source
Just van Rossum52e14d62002-12-30 22:08:05 +00001265 is available. */
1266static time_t
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001267get_mtime_of_source(ZipImporter *self, PyObject *path)
Just van Rossum52e14d62002-12-30 22:08:05 +00001268{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001269 PyObject *toc_entry, *stripped;
1270 time_t mtime;
1271
1272 /* strip 'c' or 'o' from *.py[co] */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001273 if (PyUnicode_READY(path) == -1)
1274 return (time_t)-1;
1275 stripped = PyUnicode_FromKindAndData(PyUnicode_KIND(path),
1276 PyUnicode_DATA(path),
1277 PyUnicode_GET_LENGTH(path) - 1);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001278 if (stripped == NULL)
1279 return (time_t)-1;
1280
1281 toc_entry = PyDict_GetItem(self->files, stripped);
1282 Py_DECREF(stripped);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001283 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1284 PyTuple_Size(toc_entry) == 8) {
1285 /* fetch the time stamp of the .py file for comparison
1286 with an embedded pyc time stamp */
1287 int time, date;
1288 time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1289 date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1290 mtime = parse_dostime(time, date);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001291 } else
1292 mtime = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001293 return mtime;
Just van Rossum52e14d62002-12-30 22:08:05 +00001294}
1295
1296/* Return the code object for the module named by 'fullname' from the
1297 Zip archive as a new reference. */
1298static PyObject *
1299get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001300 time_t mtime, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +00001301{
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001302 PyObject *data, *modpath, *code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001303
Victor Stinner60fe8d92010-08-16 23:48:11 +00001304 data = get_data(self->archive, toc_entry);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001305 if (data == NULL)
1306 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001307
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001308 modpath = PyTuple_GetItem(toc_entry, 0);
Victor Stinner2a94f4c2010-10-18 12:15:34 +00001309 if (isbytecode)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001310 code = unmarshal_code(modpath, data, mtime);
Victor Stinner2a94f4c2010-10-18 12:15:34 +00001311 else
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001312 code = compile_source(modpath, data);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001313 Py_DECREF(data);
1314 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001315}
1316
Ezio Melotti42da6632011-03-15 05:18:48 +02001317/* Get the code object associated with the module specified by
Just van Rossum52e14d62002-12-30 22:08:05 +00001318 'fullname'. */
1319static PyObject *
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001320get_module_code(ZipImporter *self, PyObject *fullname,
Victor Stinner08654e12010-10-18 12:09:02 +00001321 int *p_ispackage, PyObject **p_modpath)
Just van Rossum52e14d62002-12-30 22:08:05 +00001322{
Gregory P. Smith95c7c462011-05-21 05:19:42 -07001323 PyObject *code = NULL, *toc_entry, *subname;
Victor Stinner9a2261a2011-05-26 13:59:41 +02001324 PyObject *path, *fullpath = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +00001326
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 subname = get_subname(fullname);
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001328 if (subname == NULL)
1329 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001330
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001331 path = make_filename(self->prefix, subname);
1332 Py_DECREF(subname);
1333 if (path == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001334 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001335
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336 for (zso = zip_searchorder; *zso->suffix; zso++) {
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001337 code = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001338
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001339 fullpath = PyUnicode_FromFormat("%U%s", path, zso->suffix);
1340 if (fullpath == NULL)
1341 goto exit;
1342
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001343 if (Py_VerboseFlag > 1)
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001344 PySys_FormatStderr("# trying %U%c%U\n",
1345 self->archive, (int)SEP, fullpath);
1346 toc_entry = PyDict_GetItem(self->files, fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 if (toc_entry != NULL) {
1348 time_t mtime = 0;
1349 int ispackage = zso->type & IS_PACKAGE;
1350 int isbytecode = zso->type & IS_BYTECODE;
Just van Rossum52e14d62002-12-30 22:08:05 +00001351
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001352 if (isbytecode) {
1353 mtime = get_mtime_of_source(self, fullpath);
1354 if (mtime == (time_t)-1 && PyErr_Occurred()) {
1355 goto exit;
1356 }
1357 }
1358 Py_CLEAR(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001359 if (p_ispackage != NULL)
1360 *p_ispackage = ispackage;
1361 code = get_code_from_data(self, ispackage,
1362 isbytecode, mtime,
1363 toc_entry);
1364 if (code == Py_None) {
1365 /* bad magic number or non-matching mtime
1366 in byte code, try next */
1367 Py_DECREF(code);
1368 continue;
1369 }
Victor Stinner08654e12010-10-18 12:09:02 +00001370 if (code != NULL && p_modpath != NULL) {
1371 *p_modpath = PyTuple_GetItem(toc_entry, 0);
1372 Py_INCREF(*p_modpath);
1373 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001374 goto exit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001375 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001376 else
1377 Py_CLEAR(fullpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 }
Victor Stinnerf6b563a2011-03-14 20:46:50 -04001379 PyErr_Format(ZipImportError, "can't find module %R", fullname);
1380exit:
1381 Py_DECREF(path);
1382 Py_XDECREF(fullpath);
1383 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001384}
1385
1386
1387/* Module init */
1388
1389PyDoc_STRVAR(zipimport_doc,
1390"zipimport provides support for importing Python modules from Zip archives.\n\
1391\n\
1392This module exports three objects:\n\
1393- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001394- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001395 subclass of ImportError, so it can be caught as ImportError, too.\n\
1396- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1397 info dicts, as used in zipimporter._files.\n\
1398\n\
1399It is usually not needed to use the zipimport module explicitly; it is\n\
1400used by the builtin import mechanism for sys.path items that are paths\n\
1401to Zip archives.");
1402
Martin v. Löwis1a214512008-06-11 05:26:20 +00001403static struct PyModuleDef zipimportmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 PyModuleDef_HEAD_INIT,
1405 "zipimport",
1406 zipimport_doc,
1407 -1,
1408 NULL,
1409 NULL,
1410 NULL,
1411 NULL,
1412 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001413};
1414
Just van Rossum52e14d62002-12-30 22:08:05 +00001415PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001416PyInit_zipimport(void)
Just van Rossum52e14d62002-12-30 22:08:05 +00001417{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 PyObject *mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001419
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 if (PyType_Ready(&ZipImporter_Type) < 0)
1421 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001422
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001423 /* Correct directory separator */
1424 zip_searchorder[0].suffix[0] = SEP;
1425 zip_searchorder[1].suffix[0] = SEP;
1426 zip_searchorder[2].suffix[0] = SEP;
1427 if (Py_OptimizeFlag) {
1428 /* Reverse *.pyc and *.pyo */
1429 struct st_zip_searchorder tmp;
1430 tmp = zip_searchorder[0];
1431 zip_searchorder[0] = zip_searchorder[1];
1432 zip_searchorder[1] = tmp;
1433 tmp = zip_searchorder[3];
1434 zip_searchorder[3] = zip_searchorder[4];
1435 zip_searchorder[4] = tmp;
1436 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001437
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 mod = PyModule_Create(&zipimportmodule);
1439 if (mod == NULL)
1440 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001441
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1443 PyExc_ImportError, NULL);
1444 if (ZipImportError == NULL)
1445 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001446
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001447 Py_INCREF(ZipImportError);
1448 if (PyModule_AddObject(mod, "ZipImportError",
1449 ZipImportError) < 0)
1450 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001451
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001452 Py_INCREF(&ZipImporter_Type);
1453 if (PyModule_AddObject(mod, "zipimporter",
1454 (PyObject *)&ZipImporter_Type) < 0)
1455 return NULL;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001456
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001457 zip_directory_cache = PyDict_New();
1458 if (zip_directory_cache == NULL)
1459 return NULL;
1460 Py_INCREF(zip_directory_cache);
1461 if (PyModule_AddObject(mod, "_zip_directory_cache",
1462 zip_directory_cache) < 0)
1463 return NULL;
1464 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001465}