blob: d58d4fecc062f451bdeb7241fd2399d91fcb39d1 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000013 char suffix[14];
14 int type;
Just van Rossum52e14d62002-12-30 22:08:05 +000015};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
Just van Rossum52e14d62002-12-30 22:08:05 +000030};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000037 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
Victor Stinner72f767e2010-10-18 11:44:21 +000039 PyObject *prefix; /* file prefix: "a/sub/directory/",
40 encoded to the filesystem encoding */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000041 PyObject *files; /* dict with file info {path: toc_entry} */
Just van Rossum52e14d62002-12-30 22:08:05 +000042};
43
Just van Rossum52e14d62002-12-30 22:08:05 +000044static PyObject *ZipImportError;
Victor Stinnerc342fca2010-10-18 11:39:05 +000045/* read_directory() cache */
Just van Rossum52e14d62002-12-30 22:08:05 +000046static PyObject *zip_directory_cache = NULL;
47
48/* forward decls */
Victor Stinner2460a432010-08-16 17:54:28 +000049static PyObject *read_directory(PyObject *archive);
Victor Stinner60fe8d92010-08-16 23:48:11 +000050static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
Just van Rossum52e14d62002-12-30 22:08:05 +000051static PyObject *get_module_code(ZipImporter *self, char *fullname,
Victor Stinner08654e12010-10-18 12:09:02 +000052 int *p_ispackage, PyObject **p_modpath);
Just van Rossum52e14d62002-12-30 22:08:05 +000053
54
55#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
56
57
58/* zipimporter.__init__
59 Split the "subdirectory" from the Zip archive path, lookup a matching
60 entry in sys.path_importer_cache, fetch the file directory from there
61 if found, or else read it from the archive. */
62static int
63zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
64{
Victor Stinner2460a432010-08-16 17:54:28 +000065 PyObject *pathobj, *files;
Victor Stinner2b8dab72010-08-14 14:54:10 +000066 Py_UNICODE *path, *p, *prefix, buf[MAXPATHLEN+2];
67 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +000068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 if (!_PyArg_NoKeywords("zipimporter()", kwds))
70 return -1;
Georg Brandl02c42872005-08-26 06:42:30 +000071
Victor Stinner2b8dab72010-08-14 14:54:10 +000072 if (!PyArg_ParseTuple(args, "O&:zipimporter",
73 PyUnicode_FSDecoder, &pathobj))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +000075
Victor Stinner2b8dab72010-08-14 14:54:10 +000076 /* copy path to buf */
77 len = PyUnicode_GET_SIZE(pathobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 if (len == 0) {
79 PyErr_SetString(ZipImportError, "archive path is empty");
Victor Stinner2b8dab72010-08-14 14:54:10 +000080 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 }
82 if (len >= MAXPATHLEN) {
83 PyErr_SetString(ZipImportError,
84 "archive path too long");
Victor Stinner2b8dab72010-08-14 14:54:10 +000085 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 }
Victor Stinner2b8dab72010-08-14 14:54:10 +000087 Py_UNICODE_strcpy(buf, PyUnicode_AS_UNICODE(pathobj));
Just van Rossum52e14d62002-12-30 22:08:05 +000088
89#ifdef ALTSEP
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090 for (p = buf; *p; p++) {
91 if (*p == ALTSEP)
92 *p = SEP;
93 }
Just van Rossum52e14d62002-12-30 22:08:05 +000094#endif
95
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 path = NULL;
97 prefix = NULL;
98 for (;;) {
99 struct stat statbuf;
100 int rv;
Just van Rossum52e14d62002-12-30 22:08:05 +0000101
Victor Stinner2b8dab72010-08-14 14:54:10 +0000102 if (pathobj == NULL) {
103 pathobj = PyUnicode_FromUnicode(buf, len);
104 if (pathobj == NULL)
105 goto error;
106 }
107 rv = _Py_stat(pathobj, &statbuf);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108 if (rv == 0) {
109 /* it exists */
110 if (S_ISREG(statbuf.st_mode))
111 /* it's a file */
112 path = buf;
113 break;
114 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000115 else if (PyErr_Occurred())
116 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000117 /* back up one path element */
Victor Stinner2b8dab72010-08-14 14:54:10 +0000118 p = Py_UNICODE_strrchr(buf, SEP);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 if (prefix != NULL)
120 *prefix = SEP;
121 if (p == NULL)
122 break;
123 *p = '\0';
Victor Stinner2b8dab72010-08-14 14:54:10 +0000124 len = p - buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 prefix = p;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000126 Py_CLEAR(pathobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000128 if (path == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 PyErr_SetString(ZipImportError, "not a Zip file");
Victor Stinner2b8dab72010-08-14 14:54:10 +0000130 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000132
Victor Stinner2b8dab72010-08-14 14:54:10 +0000133 files = PyDict_GetItem(zip_directory_cache, pathobj);
134 if (files == NULL) {
Victor Stinner2460a432010-08-16 17:54:28 +0000135 files = read_directory(pathobj);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000136 if (files == NULL)
137 goto error;
138 if (PyDict_SetItem(zip_directory_cache, pathobj, files) != 0)
139 goto error;
140 }
141 else
142 Py_INCREF(files);
143 self->files = files;
144
145 self->archive = pathobj;
146 pathobj = NULL;
147
148 if (prefix != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 prefix++;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000150 len = Py_UNICODE_strlen(prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000151 if (prefix[len-1] != SEP) {
152 /* add trailing SEP */
153 prefix[len] = SEP;
154 prefix[len + 1] = '\0';
Victor Stinner2b8dab72010-08-14 14:54:10 +0000155 len++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 }
157 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000158 else
159 len = 0;
160 self->prefix = PyUnicode_FromUnicode(prefix, len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 if (self->prefix == NULL)
Victor Stinner2b8dab72010-08-14 14:54:10 +0000162 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000163
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000164 return 0;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000165
166error:
167 Py_XDECREF(pathobj);
168 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +0000169}
170
171/* GC support. */
172static int
173zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 ZipImporter *self = (ZipImporter *)obj;
176 Py_VISIT(self->files);
177 return 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000178}
179
180static void
181zipimporter_dealloc(ZipImporter *self)
182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 PyObject_GC_UnTrack(self);
184 Py_XDECREF(self->archive);
185 Py_XDECREF(self->prefix);
186 Py_XDECREF(self->files);
187 Py_TYPE(self)->tp_free((PyObject *)self);
Just van Rossum52e14d62002-12-30 22:08:05 +0000188}
189
190static PyObject *
191zipimporter_repr(ZipImporter *self)
192{
Victor Stinner028dd972010-08-17 00:04:48 +0000193 if (self->archive == NULL)
194 return PyUnicode_FromString("<zipimporter object \"???\">");
195 else if (self->prefix != NULL && PyUnicode_GET_SIZE(self->prefix) != 0)
196 return PyUnicode_FromFormat("<zipimporter object \"%.300U%c%.150U\">",
197 self->archive, SEP, self->prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 else
Victor Stinner028dd972010-08-17 00:04:48 +0000199 return PyUnicode_FromFormat("<zipimporter object \"%.300U\">",
200 self->archive);
Just van Rossum52e14d62002-12-30 22:08:05 +0000201}
202
203/* return fullname.split(".")[-1] */
204static char *
205get_subname(char *fullname)
206{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 char *subname = strrchr(fullname, '.');
208 if (subname == NULL)
209 subname = fullname;
210 else
211 subname++;
212 return subname;
Just van Rossum52e14d62002-12-30 22:08:05 +0000213}
214
215/* Given a (sub)modulename, write the potential file path in the
216 archive (without extension) to the path buffer. Return the
217 length of the resulting string. */
218static int
Victor Stinner269aeb72010-10-18 20:40:59 +0000219make_filename(PyObject *prefix_obj, char *name, char *path, size_t pathsize)
Just van Rossum52e14d62002-12-30 22:08:05 +0000220{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000221 size_t len;
222 char *p;
Victor Stinner72f767e2010-10-18 11:44:21 +0000223 PyObject *prefix;
Just van Rossum52e14d62002-12-30 22:08:05 +0000224
Victor Stinner72f767e2010-10-18 11:44:21 +0000225 prefix = PyUnicode_EncodeFSDefault(prefix_obj);
226 if (prefix == NULL)
227 return -1;
228 len = PyBytes_GET_SIZE(prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
Victor Stinner269aeb72010-10-18 20:40:59 +0000231 if (len + strlen(name) + 13 >= pathsize - 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000232 PyErr_SetString(ZipImportError, "path too long");
Victor Stinner72f767e2010-10-18 11:44:21 +0000233 Py_DECREF(prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 return -1;
235 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000236
Victor Stinner72f767e2010-10-18 11:44:21 +0000237 strcpy(path, PyBytes_AS_STRING(prefix));
238 Py_DECREF(prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 strcpy(path + len, name);
240 for (p = path + len; *p; p++) {
241 if (*p == '.')
242 *p = SEP;
243 }
244 len += strlen(name);
245 assert(len < INT_MAX);
246 return (int)len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000247}
248
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000249enum zi_module_info {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 MI_ERROR,
251 MI_NOT_FOUND,
252 MI_MODULE,
253 MI_PACKAGE
Just van Rossum52e14d62002-12-30 22:08:05 +0000254};
255
256/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000257static enum zi_module_info
Just van Rossum52e14d62002-12-30 22:08:05 +0000258get_module_info(ZipImporter *self, char *fullname)
259{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000260 char *subname, path[MAXPATHLEN + 1];
261 int len;
262 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +0000263
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 subname = get_subname(fullname);
Just van Rossum52e14d62002-12-30 22:08:05 +0000265
Victor Stinner269aeb72010-10-18 20:40:59 +0000266 len = make_filename(self->prefix, subname, path, sizeof(path));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 if (len < 0)
268 return MI_ERROR;
Just van Rossum52e14d62002-12-30 22:08:05 +0000269
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000270 for (zso = zip_searchorder; *zso->suffix; zso++) {
271 strcpy(path + len, zso->suffix);
272 if (PyDict_GetItemString(self->files, path) != NULL) {
273 if (zso->type & IS_PACKAGE)
274 return MI_PACKAGE;
275 else
276 return MI_MODULE;
277 }
278 }
279 return MI_NOT_FOUND;
Just van Rossum52e14d62002-12-30 22:08:05 +0000280}
281
282/* Check whether we can satisfy the import of the module named by
283 'fullname'. Return self if we can, None if we can't. */
284static PyObject *
285zipimporter_find_module(PyObject *obj, PyObject *args)
286{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000287 ZipImporter *self = (ZipImporter *)obj;
288 PyObject *path = NULL;
289 char *fullname;
290 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000291
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000292 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
293 &fullname, &path))
294 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000295
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 mi = get_module_info(self, fullname);
297 if (mi == MI_ERROR)
298 return NULL;
299 if (mi == MI_NOT_FOUND) {
300 Py_INCREF(Py_None);
301 return Py_None;
302 }
303 Py_INCREF(self);
304 return (PyObject *)self;
Just van Rossum52e14d62002-12-30 22:08:05 +0000305}
306
307/* Load and return the module named by 'fullname'. */
308static PyObject *
309zipimporter_load_module(PyObject *obj, PyObject *args)
310{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000311 ZipImporter *self = (ZipImporter *)obj;
Victor Stinner26fabe12010-10-18 12:03:25 +0000312 PyObject *code = NULL, *mod, *dict;
Victor Stinner08654e12010-10-18 12:09:02 +0000313 char *fullname;
314 PyObject *modpath = NULL, *modpath_bytes;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000315 int ispackage;
Just van Rossum52e14d62002-12-30 22:08:05 +0000316
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000317 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
318 &fullname))
319 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000320
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000321 code = get_module_code(self, fullname, &ispackage, &modpath);
322 if (code == NULL)
Victor Stinner26fabe12010-10-18 12:03:25 +0000323 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000324
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000325 mod = PyImport_AddModule(fullname);
Victor Stinner26fabe12010-10-18 12:03:25 +0000326 if (mod == NULL)
327 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000328 dict = PyModule_GetDict(mod);
Just van Rossum52e14d62002-12-30 22:08:05 +0000329
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000330 /* mod.__loader__ = self */
331 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
332 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000333
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 if (ispackage) {
335 /* add __path__ to the module *before* the code gets
336 executed */
337 PyObject *pkgpath, *fullpath;
338 char *subname = get_subname(fullname);
339 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000340
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 fullpath = PyUnicode_FromFormat("%U%c%U%s",
342 self->archive, SEP,
343 self->prefix, subname);
344 if (fullpath == NULL)
345 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000346
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000347 pkgpath = Py_BuildValue("[O]", fullpath);
348 Py_DECREF(fullpath);
349 if (pkgpath == NULL)
350 goto error;
351 err = PyDict_SetItemString(dict, "__path__", pkgpath);
352 Py_DECREF(pkgpath);
353 if (err != 0)
354 goto error;
355 }
Victor Stinner08654e12010-10-18 12:09:02 +0000356 modpath_bytes = PyUnicode_EncodeFSDefault(modpath);
357 if (modpath_bytes == NULL)
358 goto error;
359 mod = PyImport_ExecCodeModuleEx(fullname, code,
360 PyBytes_AS_STRING(modpath_bytes));
361 Py_DECREF(modpath_bytes);
Victor Stinner26fabe12010-10-18 12:03:25 +0000362 Py_CLEAR(code);
363 if (mod == NULL)
364 goto error;
365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 if (Py_VerboseFlag)
Victor Stinner08654e12010-10-18 12:09:02 +0000367 PySys_FormatStderr("import %s # loaded from Zip %U\n",
368 fullname, modpath);
369 Py_DECREF(modpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000370 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +0000371error:
Victor Stinner26fabe12010-10-18 12:03:25 +0000372 Py_XDECREF(code);
Victor Stinner08654e12010-10-18 12:09:02 +0000373 Py_XDECREF(modpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000375}
376
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000377/* Return a string matching __file__ for the named module */
378static PyObject *
379zipimporter_get_filename(PyObject *obj, PyObject *args)
380{
381 ZipImporter *self = (ZipImporter *)obj;
382 PyObject *code;
Victor Stinner08654e12010-10-18 12:09:02 +0000383 char *fullname;
384 PyObject *modpath;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000385 int ispackage;
386
Nick Coghlan9a1d6e32009-02-08 03:37:27 +0000387 if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000388 &fullname))
Victor Stinnerc342fca2010-10-18 11:39:05 +0000389 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000390
391 /* Deciding the filename requires working out where the code
392 would come from if the module was actually loaded */
393 code = get_module_code(self, fullname, &ispackage, &modpath);
394 if (code == NULL)
Victor Stinnerc342fca2010-10-18 11:39:05 +0000395 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000396 Py_DECREF(code); /* Only need the path info */
397
Victor Stinner08654e12010-10-18 12:09:02 +0000398 return modpath;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000399}
400
Just van Rossum52e14d62002-12-30 22:08:05 +0000401/* Return a bool signifying whether the module is a package or not. */
402static PyObject *
403zipimporter_is_package(PyObject *obj, PyObject *args)
404{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 ZipImporter *self = (ZipImporter *)obj;
406 char *fullname;
407 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000408
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000409 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
410 &fullname))
411 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000412
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000413 mi = get_module_info(self, fullname);
414 if (mi == MI_ERROR)
415 return NULL;
416 if (mi == MI_NOT_FOUND) {
417 PyErr_Format(ZipImportError, "can't find module '%.200s'",
418 fullname);
419 return NULL;
420 }
421 return PyBool_FromLong(mi == MI_PACKAGE);
Just van Rossum52e14d62002-12-30 22:08:05 +0000422}
423
424static PyObject *
425zipimporter_get_data(PyObject *obj, PyObject *args)
426{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000427 ZipImporter *self = (ZipImporter *)obj;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000428 PyObject *pathobj, *key;
429 const Py_UNICODE *path;
Just van Rossum52e14d62002-12-30 22:08:05 +0000430#ifdef ALTSEP
Victor Stinner60fe8d92010-08-16 23:48:11 +0000431 Py_UNICODE *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000432#endif
Victor Stinner60fe8d92010-08-16 23:48:11 +0000433 Py_UNICODE *archive;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000434 PyObject *toc_entry;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000435 Py_ssize_t path_len, len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000436
Victor Stinner60fe8d92010-08-16 23:48:11 +0000437 if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &pathobj))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000438 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000439
Victor Stinner60fe8d92010-08-16 23:48:11 +0000440 path_len = PyUnicode_GET_SIZE(pathobj);
441 path = PyUnicode_AS_UNICODE(pathobj);
Just van Rossum52e14d62002-12-30 22:08:05 +0000442#ifdef ALTSEP
Victor Stinner60fe8d92010-08-16 23:48:11 +0000443 if (path_len >= MAXPATHLEN) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 PyErr_SetString(ZipImportError, "path too long");
445 return NULL;
446 }
Victor Stinner60fe8d92010-08-16 23:48:11 +0000447 Py_UNICODE_strcpy(buf, path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000448 for (p = buf; *p; p++) {
449 if (*p == ALTSEP)
450 *p = SEP;
451 }
452 path = buf;
Just van Rossum52e14d62002-12-30 22:08:05 +0000453#endif
Victor Stinner60fe8d92010-08-16 23:48:11 +0000454 archive = PyUnicode_AS_UNICODE(self->archive);
455 len = PyUnicode_GET_SIZE(self->archive);
456 if ((size_t)len < Py_UNICODE_strlen(path) &&
457 Py_UNICODE_strncmp(path, archive, len) == 0 &&
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000458 path[len] == SEP) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000459 path += len + 1;
460 path_len -= len + 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000462
Victor Stinner60fe8d92010-08-16 23:48:11 +0000463 key = PyUnicode_FromUnicode(path, path_len);
464 if (key == NULL)
465 return NULL;
466 toc_entry = PyDict_GetItem(self->files, key);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000467 if (toc_entry == NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000468 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key);
469 Py_DECREF(key);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000470 return NULL;
471 }
Victor Stinner60fe8d92010-08-16 23:48:11 +0000472 Py_DECREF(key);
473 return get_data(self->archive, toc_entry);
Just van Rossum52e14d62002-12-30 22:08:05 +0000474}
475
476static PyObject *
477zipimporter_get_code(PyObject *obj, PyObject *args)
478{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000479 ZipImporter *self = (ZipImporter *)obj;
480 char *fullname;
Just van Rossum52e14d62002-12-30 22:08:05 +0000481
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000482 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
483 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000484
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000485 return get_module_code(self, fullname, NULL, NULL);
Just van Rossum52e14d62002-12-30 22:08:05 +0000486}
487
488static PyObject *
489zipimporter_get_source(PyObject *obj, PyObject *args)
490{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000491 ZipImporter *self = (ZipImporter *)obj;
492 PyObject *toc_entry;
493 char *fullname, *subname, path[MAXPATHLEN+1];
494 int len;
495 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000496
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000497 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
498 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000499
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 mi = get_module_info(self, fullname);
501 if (mi == MI_ERROR)
502 return NULL;
503 if (mi == MI_NOT_FOUND) {
504 PyErr_Format(ZipImportError, "can't find module '%.200s'",
505 fullname);
506 return NULL;
507 }
508 subname = get_subname(fullname);
Just van Rossum52e14d62002-12-30 22:08:05 +0000509
Victor Stinner269aeb72010-10-18 20:40:59 +0000510 len = make_filename(self->prefix, subname, path, sizeof(path));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000511 if (len < 0)
512 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000513
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000514 if (mi == MI_PACKAGE) {
515 path[len] = SEP;
516 strcpy(path + len + 1, "__init__.py");
517 }
518 else
519 strcpy(path + len, ".py");
Just van Rossum52e14d62002-12-30 22:08:05 +0000520
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000521 toc_entry = PyDict_GetItemString(self->files, path);
522 if (toc_entry != NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000523 PyObject *res, *bytes;
524 bytes = get_data(self->archive, toc_entry);
525 if (bytes == NULL)
526 return NULL;
527 res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
528 PyBytes_GET_SIZE(bytes));
529 Py_DECREF(bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000530 return res;
531 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 /* we have the module, but no source */
534 Py_INCREF(Py_None);
535 return Py_None;
Just van Rossum52e14d62002-12-30 22:08:05 +0000536}
537
538PyDoc_STRVAR(doc_find_module,
539"find_module(fullname, path=None) -> self or None.\n\
540\n\
541Search for a module specified by 'fullname'. 'fullname' must be the\n\
542fully qualified (dotted) module name. It returns the zipimporter\n\
543instance itself if the module was found, or None if it wasn't.\n\
544The optional 'path' argument is ignored -- it's there for compatibility\n\
545with the importer protocol.");
546
547PyDoc_STRVAR(doc_load_module,
548"load_module(fullname) -> module.\n\
549\n\
550Load the module specified by 'fullname'. 'fullname' must be the\n\
551fully qualified (dotted) module name. It returns the imported\n\
552module, or raises ZipImportError if it wasn't found.");
553
554PyDoc_STRVAR(doc_get_data,
555"get_data(pathname) -> string with file data.\n\
556\n\
557Return the data associated with 'pathname'. Raise IOError if\n\
558the file wasn't found.");
559
560PyDoc_STRVAR(doc_is_package,
561"is_package(fullname) -> bool.\n\
562\n\
563Return True if the module specified by fullname is a package.\n\
Brian Curtin32839732010-07-21 01:44:19 +0000564Raise ZipImportError if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000565
566PyDoc_STRVAR(doc_get_code,
567"get_code(fullname) -> code object.\n\
568\n\
569Return the code object for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000570if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000571
572PyDoc_STRVAR(doc_get_source,
573"get_source(fullname) -> source string.\n\
574\n\
575Return the source code for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000576if the module couldn't be found, return None if the archive does\n\
Just van Rossum52e14d62002-12-30 22:08:05 +0000577contain the module, but has no source for it.");
578
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000579
580PyDoc_STRVAR(doc_get_filename,
Nick Coghlan9a1d6e32009-02-08 03:37:27 +0000581"get_filename(fullname) -> filename string.\n\
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000582\n\
583Return the filename for the specified module.");
584
Just van Rossum52e14d62002-12-30 22:08:05 +0000585static PyMethodDef zipimporter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000586 {"find_module", zipimporter_find_module, METH_VARARGS,
587 doc_find_module},
588 {"load_module", zipimporter_load_module, METH_VARARGS,
589 doc_load_module},
590 {"get_data", zipimporter_get_data, METH_VARARGS,
591 doc_get_data},
592 {"get_code", zipimporter_get_code, METH_VARARGS,
593 doc_get_code},
594 {"get_source", zipimporter_get_source, METH_VARARGS,
595 doc_get_source},
596 {"get_filename", zipimporter_get_filename, METH_VARARGS,
597 doc_get_filename},
598 {"is_package", zipimporter_is_package, METH_VARARGS,
599 doc_is_package},
600 {NULL, NULL} /* sentinel */
Just van Rossum52e14d62002-12-30 22:08:05 +0000601};
602
603static PyMemberDef zipimporter_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000604 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
605 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
606 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
607 {NULL}
Just van Rossum52e14d62002-12-30 22:08:05 +0000608};
609
610PyDoc_STRVAR(zipimporter_doc,
611"zipimporter(archivepath) -> zipimporter object\n\
612\n\
613Create a new zipimporter instance. 'archivepath' must be a path to\n\
Alexandre Vassalotti8ae3e052008-05-16 00:41:41 +0000614a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
615'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
616valid directory inside the archive.\n\
617\n\
618'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
619archive.\n\
620\n\
621The 'archive' attribute of zipimporter objects contains the name of the\n\
622zipfile targeted.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000623
624#define DEFERRED_ADDRESS(ADDR) 0
625
626static PyTypeObject ZipImporter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000627 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
628 "zipimport.zipimporter",
629 sizeof(ZipImporter),
630 0, /* tp_itemsize */
631 (destructor)zipimporter_dealloc, /* tp_dealloc */
632 0, /* tp_print */
633 0, /* tp_getattr */
634 0, /* tp_setattr */
635 0, /* tp_reserved */
636 (reprfunc)zipimporter_repr, /* tp_repr */
637 0, /* tp_as_number */
638 0, /* tp_as_sequence */
639 0, /* tp_as_mapping */
640 0, /* tp_hash */
641 0, /* tp_call */
642 0, /* tp_str */
643 PyObject_GenericGetAttr, /* tp_getattro */
644 0, /* tp_setattro */
645 0, /* tp_as_buffer */
646 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
647 Py_TPFLAGS_HAVE_GC, /* tp_flags */
648 zipimporter_doc, /* tp_doc */
649 zipimporter_traverse, /* tp_traverse */
650 0, /* tp_clear */
651 0, /* tp_richcompare */
652 0, /* tp_weaklistoffset */
653 0, /* tp_iter */
654 0, /* tp_iternext */
655 zipimporter_methods, /* tp_methods */
656 zipimporter_members, /* tp_members */
657 0, /* tp_getset */
658 0, /* tp_base */
659 0, /* tp_dict */
660 0, /* tp_descr_get */
661 0, /* tp_descr_set */
662 0, /* tp_dictoffset */
663 (initproc)zipimporter_init, /* tp_init */
664 PyType_GenericAlloc, /* tp_alloc */
665 PyType_GenericNew, /* tp_new */
666 PyObject_GC_Del, /* tp_free */
Just van Rossum52e14d62002-12-30 22:08:05 +0000667};
668
669
670/* implementation */
671
Just van Rossum52e14d62002-12-30 22:08:05 +0000672/* Given a buffer, return the long that is represented by the first
673 4 bytes, encoded as little endian. This partially reimplements
674 marshal.c:r_long() */
675static long
676get_long(unsigned char *buf) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000677 long x;
678 x = buf[0];
679 x |= (long)buf[1] << 8;
680 x |= (long)buf[2] << 16;
681 x |= (long)buf[3] << 24;
Just van Rossum52e14d62002-12-30 22:08:05 +0000682#if SIZEOF_LONG > 4
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000683 /* Sign extension for 64-bit machines */
684 x |= -(x & 0x80000000L);
Just van Rossum52e14d62002-12-30 22:08:05 +0000685#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000686 return x;
Just van Rossum52e14d62002-12-30 22:08:05 +0000687}
688
689/*
690 read_directory(archive) -> files dict (new reference)
691
692 Given a path to a Zip archive, build a dict, mapping file names
693 (local to the archive, using SEP as a separator) to toc entries.
694
695 A toc_entry is a tuple:
696
Victor Stinner08654e12010-10-18 12:09:02 +0000697 (__file__, # value to use for __file__, available for all files,
698 # encoded to the filesystem encoding
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000699 compress, # compression kind; 0 for uncompressed
700 data_size, # size of compressed data on disk
701 file_size, # size of decompressed data
702 file_offset, # offset of file header from start of archive
703 time, # mod time of file (in dos format)
704 date, # mod data of file (in dos format)
705 crc, # crc checksum of the data
Victor Stinnerc342fca2010-10-18 11:39:05 +0000706 )
Just van Rossum52e14d62002-12-30 22:08:05 +0000707
708 Directories can be recognized by the trailing SEP in the name,
709 data_size and file_offset are 0.
710*/
711static PyObject *
Victor Stinner2460a432010-08-16 17:54:28 +0000712read_directory(PyObject *archive_obj)
Just van Rossum52e14d62002-12-30 22:08:05 +0000713{
Victor Stinner2460a432010-08-16 17:54:28 +0000714 /* FIXME: work on Py_UNICODE* instead of char* */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000715 PyObject *files = NULL;
716 FILE *fp;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000717 unsigned short flags;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 long compress, crc, data_size, file_size, file_offset, date, time;
719 long header_offset, name_size, header_size, header_position;
720 long i, l, count;
721 size_t length;
Victor Stinner2460a432010-08-16 17:54:28 +0000722 Py_UNICODE path[MAXPATHLEN + 5];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000723 char name[MAXPATHLEN + 5];
Victor Stinner2460a432010-08-16 17:54:28 +0000724 PyObject *nameobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000725 char *p, endof_central_dir[22];
726 long arc_offset; /* offset from beginning of file to start of zip-archive */
Victor Stinner2460a432010-08-16 17:54:28 +0000727 PyObject *pathobj;
Victor Stinnerd36c8212010-10-18 12:13:46 +0000728 const char *charset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000729
Victor Stinner2460a432010-08-16 17:54:28 +0000730 if (PyUnicode_GET_SIZE(archive_obj) > MAXPATHLEN) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000731 PyErr_SetString(PyExc_OverflowError,
732 "Zip path name is too long");
733 return NULL;
734 }
Victor Stinner2460a432010-08-16 17:54:28 +0000735 Py_UNICODE_strcpy(path, PyUnicode_AS_UNICODE(archive_obj));
Just van Rossum52e14d62002-12-30 22:08:05 +0000736
Victor Stinner2460a432010-08-16 17:54:28 +0000737 fp = _Py_fopen(archive_obj, "rb");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000738 if (fp == NULL) {
739 PyErr_Format(ZipImportError, "can't open Zip file: "
Victor Stinner2460a432010-08-16 17:54:28 +0000740 "'%.200U'", archive_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000741 return NULL;
742 }
743 fseek(fp, -22, SEEK_END);
744 header_position = ftell(fp);
745 if (fread(endof_central_dir, 1, 22, fp) != 22) {
746 fclose(fp);
747 PyErr_Format(ZipImportError, "can't read Zip file: "
Victor Stinner2460a432010-08-16 17:54:28 +0000748 "'%.200U'", archive_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000749 return NULL;
750 }
751 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
752 /* Bad: End of Central Dir signature */
753 fclose(fp);
754 PyErr_Format(ZipImportError, "not a Zip file: "
Victor Stinner2460a432010-08-16 17:54:28 +0000755 "'%.200U'", archive_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000756 return NULL;
757 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000758
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000759 header_size = get_long((unsigned char *)endof_central_dir + 12);
760 header_offset = get_long((unsigned char *)endof_central_dir + 16);
761 arc_offset = header_position - header_offset - header_size;
762 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000763
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000764 files = PyDict_New();
765 if (files == NULL)
766 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000767
Victor Stinner2460a432010-08-16 17:54:28 +0000768 length = Py_UNICODE_strlen(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000769 path[length] = SEP;
Just van Rossum52e14d62002-12-30 22:08:05 +0000770
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000771 /* Start of Central Directory */
772 count = 0;
773 for (;;) {
774 PyObject *t;
775 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000776
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000777 fseek(fp, header_offset, 0); /* Start of file header */
778 l = PyMarshal_ReadLongFromFile(fp);
779 if (l != 0x02014B50)
780 break; /* Bad: Central Dir File Header */
Victor Stinnerd36c8212010-10-18 12:13:46 +0000781 fseek(fp, header_offset + 8, 0);
782 flags = (unsigned short)PyMarshal_ReadShortFromFile(fp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000783 compress = PyMarshal_ReadShortFromFile(fp);
784 time = PyMarshal_ReadShortFromFile(fp);
785 date = PyMarshal_ReadShortFromFile(fp);
786 crc = PyMarshal_ReadLongFromFile(fp);
787 data_size = PyMarshal_ReadLongFromFile(fp);
788 file_size = PyMarshal_ReadLongFromFile(fp);
789 name_size = PyMarshal_ReadShortFromFile(fp);
790 header_size = 46 + name_size +
791 PyMarshal_ReadShortFromFile(fp) +
792 PyMarshal_ReadShortFromFile(fp);
793 fseek(fp, header_offset + 42, 0);
794 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
795 if (name_size > MAXPATHLEN)
796 name_size = MAXPATHLEN;
Just van Rossum52e14d62002-12-30 22:08:05 +0000797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 p = name;
799 for (i = 0; i < name_size; i++) {
800 *p = (char)getc(fp);
801 if (*p == '/')
802 *p = SEP;
803 p++;
804 }
805 *p = 0; /* Add terminating null byte */
806 header_offset += header_size;
Just van Rossum52e14d62002-12-30 22:08:05 +0000807
Victor Stinnerd36c8212010-10-18 12:13:46 +0000808 if (flags & 0x0800)
809 charset = "utf-8";
810 else
811 charset = "cp437";
812 nameobj = PyUnicode_Decode(name, name_size, charset, NULL);
Victor Stinner2460a432010-08-16 17:54:28 +0000813 if (nameobj == NULL)
814 goto error;
815 Py_UNICODE_strncpy(path + length + 1, PyUnicode_AS_UNICODE(nameobj), MAXPATHLEN - length - 1);
Just van Rossum52e14d62002-12-30 22:08:05 +0000816
Victor Stinner2460a432010-08-16 17:54:28 +0000817 pathobj = PyUnicode_FromUnicode(path, Py_UNICODE_strlen(path));
818 if (pathobj == NULL)
819 goto error;
820 t = Py_BuildValue("Niiiiiii", pathobj, compress, data_size,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000821 file_size, file_offset, time, date, crc);
822 if (t == NULL)
823 goto error;
Victor Stinner2460a432010-08-16 17:54:28 +0000824 err = PyDict_SetItem(files, nameobj, t);
825 Py_CLEAR(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000826 Py_DECREF(t);
827 if (err != 0)
828 goto error;
829 count++;
830 }
831 fclose(fp);
832 if (Py_VerboseFlag)
Victor Stinner2460a432010-08-16 17:54:28 +0000833 PySys_FormatStderr("# zipimport: found %ld names in %U\n",
834 count, archive_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000835 return files;
Just van Rossum52e14d62002-12-30 22:08:05 +0000836error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000837 fclose(fp);
838 Py_XDECREF(files);
Victor Stinner2460a432010-08-16 17:54:28 +0000839 Py_XDECREF(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000840 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000841}
842
843/* Return the zlib.decompress function object, or NULL if zlib couldn't
844 be imported. The function is cached when found, so subsequent calls
845 don't import zlib again. Returns a *borrowed* reference.
846 XXX This makes zlib.decompress immortal. */
847static PyObject *
848get_decompress_func(void)
849{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000850 static PyObject *decompress = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000851
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000852 if (decompress == NULL) {
853 PyObject *zlib;
854 static int importing_zlib = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000855
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 if (importing_zlib != 0)
857 /* Someone has a zlib.py[co] in their Zip file;
858 let's avoid a stack overflow. */
859 return NULL;
860 importing_zlib = 1;
861 zlib = PyImport_ImportModuleNoBlock("zlib");
862 importing_zlib = 0;
863 if (zlib != NULL) {
864 decompress = PyObject_GetAttrString(zlib,
865 "decompress");
866 Py_DECREF(zlib);
867 }
868 else
869 PyErr_Clear();
870 if (Py_VerboseFlag)
871 PySys_WriteStderr("# zipimport: zlib %s\n",
872 zlib != NULL ? "available": "UNAVAILABLE");
873 }
874 return decompress;
Just van Rossum52e14d62002-12-30 22:08:05 +0000875}
876
877/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
878 data as a new reference. */
879static PyObject *
Victor Stinner60fe8d92010-08-16 23:48:11 +0000880get_data(PyObject *archive, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +0000881{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 PyObject *raw_data, *data = NULL, *decompress;
883 char *buf;
884 FILE *fp;
885 int err;
886 Py_ssize_t bytes_read = 0;
887 long l;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000888 PyObject *datapath;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000889 long compress, data_size, file_size, file_offset, bytes_size;
890 long time, date, crc;
Just van Rossum52e14d62002-12-30 22:08:05 +0000891
Victor Stinner60fe8d92010-08-16 23:48:11 +0000892 if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000893 &data_size, &file_size, &file_offset, &time,
894 &date, &crc)) {
895 return NULL;
896 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000897
Victor Stinner60fe8d92010-08-16 23:48:11 +0000898 fp = _Py_fopen(archive, "rb");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000899 if (!fp) {
900 PyErr_Format(PyExc_IOError,
Victor Stinner60fe8d92010-08-16 23:48:11 +0000901 "zipimport: can not open file %U", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000902 return NULL;
903 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000904
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000905 /* Check to make sure the local file header is correct */
906 fseek(fp, file_offset, 0);
907 l = PyMarshal_ReadLongFromFile(fp);
908 if (l != 0x04034B50) {
909 /* Bad: Local File Header */
910 PyErr_Format(ZipImportError,
Victor Stinner60fe8d92010-08-16 23:48:11 +0000911 "bad local file header in %U",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000912 archive);
913 fclose(fp);
914 return NULL;
915 }
916 fseek(fp, file_offset + 26, 0);
917 l = 30 + PyMarshal_ReadShortFromFile(fp) +
918 PyMarshal_ReadShortFromFile(fp); /* local header size */
919 file_offset += l; /* Start of file data */
Just van Rossum52e14d62002-12-30 22:08:05 +0000920
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000921 bytes_size = compress == 0 ? data_size : data_size + 1;
922 if (bytes_size == 0)
923 bytes_size++;
924 raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
Just van Rossum52e14d62002-12-30 22:08:05 +0000925
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000926 if (raw_data == NULL) {
927 fclose(fp);
928 return NULL;
929 }
930 buf = PyBytes_AsString(raw_data);
Just van Rossum52e14d62002-12-30 22:08:05 +0000931
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000932 err = fseek(fp, file_offset, 0);
933 if (err == 0)
934 bytes_read = fread(buf, 1, data_size, fp);
935 fclose(fp);
936 if (err || bytes_read != data_size) {
937 PyErr_SetString(PyExc_IOError,
938 "zipimport: can't read data");
939 Py_DECREF(raw_data);
940 return NULL;
941 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000942
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000943 if (compress != 0) {
944 buf[data_size] = 'Z'; /* saw this in zipfile.py */
945 data_size++;
946 }
947 buf[data_size] = '\0';
Just van Rossum52e14d62002-12-30 22:08:05 +0000948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000949 if (compress == 0) { /* data is not compressed */
950 data = PyBytes_FromStringAndSize(buf, data_size);
951 Py_DECREF(raw_data);
952 return data;
953 }
954
955 /* Decompress with zlib */
956 decompress = get_decompress_func();
957 if (decompress == NULL) {
958 PyErr_SetString(ZipImportError,
959 "can't decompress data; "
960 "zlib not available");
961 goto error;
962 }
963 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000964error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000965 Py_DECREF(raw_data);
966 return data;
Just van Rossum52e14d62002-12-30 22:08:05 +0000967}
968
969/* Lenient date/time comparison function. The precision of the mtime
970 in the archive is lower than the mtime stored in a .pyc: we
971 must allow a difference of at most one second. */
972static int
973eq_mtime(time_t t1, time_t t2)
974{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000975 time_t d = t1 - t2;
976 if (d < 0)
977 d = -d;
978 /* dostime only stores even seconds, so be lenient */
979 return d <= 1;
Just van Rossum52e14d62002-12-30 22:08:05 +0000980}
981
982/* Given the contents of a .py[co] file in a buffer, unmarshal the data
983 and return the code object. Return None if it the magic word doesn't
984 match (we do this instead of raising an exception as we fall back
985 to .py if available and we don't want to mask other errors).
986 Returns a new reference. */
987static PyObject *
988unmarshal_code(char *pathname, PyObject *data, time_t mtime)
989{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000990 PyObject *code;
991 char *buf = PyBytes_AsString(data);
992 Py_ssize_t size = PyBytes_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +0000993
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000994 if (size <= 9) {
995 PyErr_SetString(ZipImportError,
996 "bad pyc data");
997 return NULL;
998 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000999
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001000 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
1001 if (Py_VerboseFlag)
1002 PySys_WriteStderr("# %s has bad magic\n",
1003 pathname);
1004 Py_INCREF(Py_None);
1005 return Py_None; /* signal caller to try alternative */
1006 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001007
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001008 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
1009 mtime)) {
1010 if (Py_VerboseFlag)
1011 PySys_WriteStderr("# %s has bad mtime\n",
1012 pathname);
1013 Py_INCREF(Py_None);
1014 return Py_None; /* signal caller to try alternative */
1015 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001016
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001017 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
1018 if (code == NULL)
1019 return NULL;
1020 if (!PyCode_Check(code)) {
1021 Py_DECREF(code);
1022 PyErr_Format(PyExc_TypeError,
1023 "compiled module %.200s is not a code object",
1024 pathname);
1025 return NULL;
1026 }
1027 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001028}
1029
1030/* Replace any occurances of "\r\n?" in the input string with "\n".
1031 This converts DOS and Mac line endings to Unix line endings.
1032 Also append a trailing "\n" to be compatible with
1033 PyParser_SimpleParseFile(). Returns a new reference. */
1034static PyObject *
1035normalize_line_endings(PyObject *source)
1036{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001037 char *buf, *q, *p = PyBytes_AsString(source);
1038 PyObject *fixed_source;
1039 int len = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +00001040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001041 if (!p) {
1042 return PyBytes_FromStringAndSize("\n\0", 2);
1043 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001044
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001045 /* one char extra for trailing \n and one for terminating \0 */
1046 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1047 if (buf == NULL) {
1048 PyErr_SetString(PyExc_MemoryError,
1049 "zipimport: no memory to allocate "
1050 "source buffer");
1051 return NULL;
1052 }
1053 /* replace "\r\n?" by "\n" */
1054 for (q = buf; *p != '\0'; p++) {
1055 if (*p == '\r') {
1056 *q++ = '\n';
1057 if (*(p + 1) == '\n')
1058 p++;
1059 }
1060 else
1061 *q++ = *p;
1062 len++;
1063 }
1064 *q++ = '\n'; /* add trailing \n */
1065 *q = '\0';
1066 fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1067 PyMem_Free(buf);
1068 return fixed_source;
Just van Rossum52e14d62002-12-30 22:08:05 +00001069}
1070
1071/* Given a string buffer containing Python source code, compile it
1072 return and return a code object as a new reference. */
1073static PyObject *
1074compile_source(char *pathname, PyObject *source)
1075{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001076 PyObject *code, *fixed_source;
Just van Rossum52e14d62002-12-30 22:08:05 +00001077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 fixed_source = normalize_line_endings(source);
1079 if (fixed_source == NULL)
1080 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 code = Py_CompileString(PyBytes_AsString(fixed_source), pathname,
1083 Py_file_input);
1084 Py_DECREF(fixed_source);
1085 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001086}
1087
1088/* Convert the date/time values found in the Zip archive to a value
1089 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001090static time_t
1091parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001092{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 struct tm stm;
Just van Rossum52e14d62002-12-30 22:08:05 +00001094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 memset((void *) &stm, '\0', sizeof(stm));
Christian Heimes679db4a2008-01-18 09:56:22 +00001096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 stm.tm_sec = (dostime & 0x1f) * 2;
1098 stm.tm_min = (dostime >> 5) & 0x3f;
1099 stm.tm_hour = (dostime >> 11) & 0x1f;
1100 stm.tm_mday = dosdate & 0x1f;
1101 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1102 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
1103 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001104
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001105 return mktime(&stm);
Just van Rossum52e14d62002-12-30 22:08:05 +00001106}
1107
1108/* Given a path to a .pyc or .pyo file in the archive, return the
1109 modifictaion time of the matching .py file, or 0 if no source
1110 is available. */
1111static time_t
1112get_mtime_of_source(ZipImporter *self, char *path)
1113{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 PyObject *toc_entry;
1115 time_t mtime = 0;
1116 Py_ssize_t lastchar = strlen(path) - 1;
1117 char savechar = path[lastchar];
1118 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1119 toc_entry = PyDict_GetItemString(self->files, path);
1120 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1121 PyTuple_Size(toc_entry) == 8) {
1122 /* fetch the time stamp of the .py file for comparison
1123 with an embedded pyc time stamp */
1124 int time, date;
1125 time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1126 date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1127 mtime = parse_dostime(time, date);
1128 }
1129 path[lastchar] = savechar;
1130 return mtime;
Just van Rossum52e14d62002-12-30 22:08:05 +00001131}
1132
1133/* Return the code object for the module named by 'fullname' from the
1134 Zip archive as a new reference. */
1135static PyObject *
1136get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 time_t mtime, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +00001138{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 PyObject *data, *code;
Victor Stinner2a94f4c2010-10-18 12:15:34 +00001140 PyObject *modpath;
Just van Rossum52e14d62002-12-30 22:08:05 +00001141
Victor Stinner60fe8d92010-08-16 23:48:11 +00001142 data = get_data(self->archive, toc_entry);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 if (data == NULL)
1144 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001145
Victor Stinner2a94f4c2010-10-18 12:15:34 +00001146 modpath = PyUnicode_EncodeFSDefault(PyTuple_GetItem(toc_entry, 0));
Victor Stinner5a7913e2010-10-16 11:29:07 +00001147 if (modpath == NULL) {
1148 Py_DECREF(data);
1149 return NULL;
1150 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001151
Victor Stinner2a94f4c2010-10-18 12:15:34 +00001152 if (isbytecode)
1153 code = unmarshal_code(PyBytes_AS_STRING(modpath), data, mtime);
1154 else
1155 code = compile_source(PyBytes_AS_STRING(modpath), data);
1156 Py_DECREF(modpath);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 Py_DECREF(data);
1158 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001159}
1160
1161/* Get the code object assoiciated with the module specified by
1162 'fullname'. */
1163static PyObject *
1164get_module_code(ZipImporter *self, char *fullname,
Victor Stinner08654e12010-10-18 12:09:02 +00001165 int *p_ispackage, PyObject **p_modpath)
Just van Rossum52e14d62002-12-30 22:08:05 +00001166{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001167 PyObject *toc_entry;
1168 char *subname, path[MAXPATHLEN + 1];
1169 int len;
1170 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +00001171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001172 subname = get_subname(fullname);
Just van Rossum52e14d62002-12-30 22:08:05 +00001173
Victor Stinner269aeb72010-10-18 20:40:59 +00001174 len = make_filename(self->prefix, subname, path, sizeof(path));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 if (len < 0)
1176 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001178 for (zso = zip_searchorder; *zso->suffix; zso++) {
1179 PyObject *code = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 strcpy(path + len, zso->suffix);
1182 if (Py_VerboseFlag > 1)
Victor Stinner353349c2010-10-18 11:40:40 +00001183 PySys_FormatStderr("# trying %U%c%s\n",
Victor Stinner72f767e2010-10-18 11:44:21 +00001184 self->archive, (int)SEP, path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 toc_entry = PyDict_GetItemString(self->files, path);
1186 if (toc_entry != NULL) {
1187 time_t mtime = 0;
1188 int ispackage = zso->type & IS_PACKAGE;
1189 int isbytecode = zso->type & IS_BYTECODE;
Just van Rossum52e14d62002-12-30 22:08:05 +00001190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 if (isbytecode)
1192 mtime = get_mtime_of_source(self, path);
1193 if (p_ispackage != NULL)
1194 *p_ispackage = ispackage;
1195 code = get_code_from_data(self, ispackage,
1196 isbytecode, mtime,
1197 toc_entry);
1198 if (code == Py_None) {
1199 /* bad magic number or non-matching mtime
1200 in byte code, try next */
1201 Py_DECREF(code);
1202 continue;
1203 }
Victor Stinner08654e12010-10-18 12:09:02 +00001204 if (code != NULL && p_modpath != NULL) {
1205 *p_modpath = PyTuple_GetItem(toc_entry, 0);
1206 Py_INCREF(*p_modpath);
1207 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 return code;
1209 }
1210 }
1211 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1212 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001213}
1214
1215
1216/* Module init */
1217
1218PyDoc_STRVAR(zipimport_doc,
1219"zipimport provides support for importing Python modules from Zip archives.\n\
1220\n\
1221This module exports three objects:\n\
1222- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001223- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001224 subclass of ImportError, so it can be caught as ImportError, too.\n\
1225- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1226 info dicts, as used in zipimporter._files.\n\
1227\n\
1228It is usually not needed to use the zipimport module explicitly; it is\n\
1229used by the builtin import mechanism for sys.path items that are paths\n\
1230to Zip archives.");
1231
Martin v. Löwis1a214512008-06-11 05:26:20 +00001232static struct PyModuleDef zipimportmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 PyModuleDef_HEAD_INIT,
1234 "zipimport",
1235 zipimport_doc,
1236 -1,
1237 NULL,
1238 NULL,
1239 NULL,
1240 NULL,
1241 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001242};
1243
Just van Rossum52e14d62002-12-30 22:08:05 +00001244PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001245PyInit_zipimport(void)
Just van Rossum52e14d62002-12-30 22:08:05 +00001246{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001247 PyObject *mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001248
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 if (PyType_Ready(&ZipImporter_Type) < 0)
1250 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001251
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001252 /* Correct directory separator */
1253 zip_searchorder[0].suffix[0] = SEP;
1254 zip_searchorder[1].suffix[0] = SEP;
1255 zip_searchorder[2].suffix[0] = SEP;
1256 if (Py_OptimizeFlag) {
1257 /* Reverse *.pyc and *.pyo */
1258 struct st_zip_searchorder tmp;
1259 tmp = zip_searchorder[0];
1260 zip_searchorder[0] = zip_searchorder[1];
1261 zip_searchorder[1] = tmp;
1262 tmp = zip_searchorder[3];
1263 zip_searchorder[3] = zip_searchorder[4];
1264 zip_searchorder[4] = tmp;
1265 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001266
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 mod = PyModule_Create(&zipimportmodule);
1268 if (mod == NULL)
1269 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001270
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001271 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1272 PyExc_ImportError, NULL);
1273 if (ZipImportError == NULL)
1274 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001275
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276 Py_INCREF(ZipImportError);
1277 if (PyModule_AddObject(mod, "ZipImportError",
1278 ZipImportError) < 0)
1279 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 Py_INCREF(&ZipImporter_Type);
1282 if (PyModule_AddObject(mod, "zipimporter",
1283 (PyObject *)&ZipImporter_Type) < 0)
1284 return NULL;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001285
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001286 zip_directory_cache = PyDict_New();
1287 if (zip_directory_cache == NULL)
1288 return NULL;
1289 Py_INCREF(zip_directory_cache);
1290 if (PyModule_AddObject(mod, "_zip_directory_cache",
1291 zip_directory_cache) < 0)
1292 return NULL;
1293 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001294}