blob: 64dbdbc525b188280d62d8c86315ce100a7582c7 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000013 char suffix[14];
14 int type;
Just van Rossum52e14d62002-12-30 22:08:05 +000015};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
Just van Rossum52e14d62002-12-30 22:08:05 +000030};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000037 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
Victor Stinner72f767e2010-10-18 11:44:21 +000039 PyObject *prefix; /* file prefix: "a/sub/directory/",
40 encoded to the filesystem encoding */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000041 PyObject *files; /* dict with file info {path: toc_entry} */
Just van Rossum52e14d62002-12-30 22:08:05 +000042};
43
Just van Rossum52e14d62002-12-30 22:08:05 +000044static PyObject *ZipImportError;
Victor Stinnerc342fca2010-10-18 11:39:05 +000045/* read_directory() cache */
Just van Rossum52e14d62002-12-30 22:08:05 +000046static PyObject *zip_directory_cache = NULL;
47
48/* forward decls */
Victor Stinner2460a432010-08-16 17:54:28 +000049static PyObject *read_directory(PyObject *archive);
Victor Stinner60fe8d92010-08-16 23:48:11 +000050static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
Just van Rossum52e14d62002-12-30 22:08:05 +000051static PyObject *get_module_code(ZipImporter *self, char *fullname,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000052 int *p_ispackage, char **p_modpath);
Just van Rossum52e14d62002-12-30 22:08:05 +000053
54
55#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
56
57
58/* zipimporter.__init__
59 Split the "subdirectory" from the Zip archive path, lookup a matching
60 entry in sys.path_importer_cache, fetch the file directory from there
61 if found, or else read it from the archive. */
62static int
63zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
64{
Victor Stinner2460a432010-08-16 17:54:28 +000065 PyObject *pathobj, *files;
Victor Stinner2b8dab72010-08-14 14:54:10 +000066 Py_UNICODE *path, *p, *prefix, buf[MAXPATHLEN+2];
67 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +000068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 if (!_PyArg_NoKeywords("zipimporter()", kwds))
70 return -1;
Georg Brandl02c42872005-08-26 06:42:30 +000071
Victor Stinner2b8dab72010-08-14 14:54:10 +000072 if (!PyArg_ParseTuple(args, "O&:zipimporter",
73 PyUnicode_FSDecoder, &pathobj))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +000075
Victor Stinner2b8dab72010-08-14 14:54:10 +000076 /* copy path to buf */
77 len = PyUnicode_GET_SIZE(pathobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 if (len == 0) {
79 PyErr_SetString(ZipImportError, "archive path is empty");
Victor Stinner2b8dab72010-08-14 14:54:10 +000080 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 }
82 if (len >= MAXPATHLEN) {
83 PyErr_SetString(ZipImportError,
84 "archive path too long");
Victor Stinner2b8dab72010-08-14 14:54:10 +000085 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000086 }
Victor Stinner2b8dab72010-08-14 14:54:10 +000087 Py_UNICODE_strcpy(buf, PyUnicode_AS_UNICODE(pathobj));
Just van Rossum52e14d62002-12-30 22:08:05 +000088
89#ifdef ALTSEP
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000090 for (p = buf; *p; p++) {
91 if (*p == ALTSEP)
92 *p = SEP;
93 }
Just van Rossum52e14d62002-12-30 22:08:05 +000094#endif
95
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 path = NULL;
97 prefix = NULL;
98 for (;;) {
99 struct stat statbuf;
100 int rv;
Just van Rossum52e14d62002-12-30 22:08:05 +0000101
Victor Stinner2b8dab72010-08-14 14:54:10 +0000102 if (pathobj == NULL) {
103 pathobj = PyUnicode_FromUnicode(buf, len);
104 if (pathobj == NULL)
105 goto error;
106 }
107 rv = _Py_stat(pathobj, &statbuf);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108 if (rv == 0) {
109 /* it exists */
110 if (S_ISREG(statbuf.st_mode))
111 /* it's a file */
112 path = buf;
113 break;
114 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000115 else if (PyErr_Occurred())
116 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000117 /* back up one path element */
Victor Stinner2b8dab72010-08-14 14:54:10 +0000118 p = Py_UNICODE_strrchr(buf, SEP);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 if (prefix != NULL)
120 *prefix = SEP;
121 if (p == NULL)
122 break;
123 *p = '\0';
Victor Stinner2b8dab72010-08-14 14:54:10 +0000124 len = p - buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 prefix = p;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000126 Py_CLEAR(pathobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000128 if (path == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 PyErr_SetString(ZipImportError, "not a Zip file");
Victor Stinner2b8dab72010-08-14 14:54:10 +0000130 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000132
Victor Stinner2b8dab72010-08-14 14:54:10 +0000133 files = PyDict_GetItem(zip_directory_cache, pathobj);
134 if (files == NULL) {
Victor Stinner2460a432010-08-16 17:54:28 +0000135 files = read_directory(pathobj);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000136 if (files == NULL)
137 goto error;
138 if (PyDict_SetItem(zip_directory_cache, pathobj, files) != 0)
139 goto error;
140 }
141 else
142 Py_INCREF(files);
143 self->files = files;
144
145 self->archive = pathobj;
146 pathobj = NULL;
147
148 if (prefix != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 prefix++;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000150 len = Py_UNICODE_strlen(prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000151 if (prefix[len-1] != SEP) {
152 /* add trailing SEP */
153 prefix[len] = SEP;
154 prefix[len + 1] = '\0';
Victor Stinner2b8dab72010-08-14 14:54:10 +0000155 len++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 }
157 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000158 else
159 len = 0;
160 self->prefix = PyUnicode_FromUnicode(prefix, len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 if (self->prefix == NULL)
Victor Stinner2b8dab72010-08-14 14:54:10 +0000162 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000163
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000164 return 0;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000165
166error:
167 Py_XDECREF(pathobj);
168 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +0000169}
170
171/* GC support. */
172static int
173zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 ZipImporter *self = (ZipImporter *)obj;
176 Py_VISIT(self->files);
177 return 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000178}
179
180static void
181zipimporter_dealloc(ZipImporter *self)
182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 PyObject_GC_UnTrack(self);
184 Py_XDECREF(self->archive);
185 Py_XDECREF(self->prefix);
186 Py_XDECREF(self->files);
187 Py_TYPE(self)->tp_free((PyObject *)self);
Just van Rossum52e14d62002-12-30 22:08:05 +0000188}
189
190static PyObject *
191zipimporter_repr(ZipImporter *self)
192{
Victor Stinner028dd972010-08-17 00:04:48 +0000193 if (self->archive == NULL)
194 return PyUnicode_FromString("<zipimporter object \"???\">");
195 else if (self->prefix != NULL && PyUnicode_GET_SIZE(self->prefix) != 0)
196 return PyUnicode_FromFormat("<zipimporter object \"%.300U%c%.150U\">",
197 self->archive, SEP, self->prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 else
Victor Stinner028dd972010-08-17 00:04:48 +0000199 return PyUnicode_FromFormat("<zipimporter object \"%.300U\">",
200 self->archive);
Just van Rossum52e14d62002-12-30 22:08:05 +0000201}
202
203/* return fullname.split(".")[-1] */
204static char *
205get_subname(char *fullname)
206{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 char *subname = strrchr(fullname, '.');
208 if (subname == NULL)
209 subname = fullname;
210 else
211 subname++;
212 return subname;
Just van Rossum52e14d62002-12-30 22:08:05 +0000213}
214
215/* Given a (sub)modulename, write the potential file path in the
216 archive (without extension) to the path buffer. Return the
217 length of the resulting string. */
218static int
Victor Stinner72f767e2010-10-18 11:44:21 +0000219make_filename(PyObject *prefix_obj, char *name, char *path)
Just van Rossum52e14d62002-12-30 22:08:05 +0000220{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000221 size_t len;
222 char *p;
Victor Stinner72f767e2010-10-18 11:44:21 +0000223 PyObject *prefix;
Just van Rossum52e14d62002-12-30 22:08:05 +0000224
Victor Stinner72f767e2010-10-18 11:44:21 +0000225 prefix = PyUnicode_EncodeFSDefault(prefix_obj);
226 if (prefix == NULL)
227 return -1;
228 len = PyBytes_GET_SIZE(prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
231 if (len + strlen(name) + 13 >= MAXPATHLEN) {
232 PyErr_SetString(ZipImportError, "path too long");
Victor Stinner72f767e2010-10-18 11:44:21 +0000233 Py_DECREF(prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 return -1;
235 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000236
Victor Stinner72f767e2010-10-18 11:44:21 +0000237 strcpy(path, PyBytes_AS_STRING(prefix));
238 Py_DECREF(prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 strcpy(path + len, name);
240 for (p = path + len; *p; p++) {
241 if (*p == '.')
242 *p = SEP;
243 }
244 len += strlen(name);
245 assert(len < INT_MAX);
246 return (int)len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000247}
248
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000249enum zi_module_info {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 MI_ERROR,
251 MI_NOT_FOUND,
252 MI_MODULE,
253 MI_PACKAGE
Just van Rossum52e14d62002-12-30 22:08:05 +0000254};
255
256/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000257static enum zi_module_info
Just van Rossum52e14d62002-12-30 22:08:05 +0000258get_module_info(ZipImporter *self, char *fullname)
259{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000260 char *subname, path[MAXPATHLEN + 1];
261 int len;
262 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +0000263
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 subname = get_subname(fullname);
Just van Rossum52e14d62002-12-30 22:08:05 +0000265
Victor Stinner72f767e2010-10-18 11:44:21 +0000266 len = make_filename(self->prefix, subname, path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 if (len < 0)
268 return MI_ERROR;
Just van Rossum52e14d62002-12-30 22:08:05 +0000269
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000270 for (zso = zip_searchorder; *zso->suffix; zso++) {
271 strcpy(path + len, zso->suffix);
272 if (PyDict_GetItemString(self->files, path) != NULL) {
273 if (zso->type & IS_PACKAGE)
274 return MI_PACKAGE;
275 else
276 return MI_MODULE;
277 }
278 }
279 return MI_NOT_FOUND;
Just van Rossum52e14d62002-12-30 22:08:05 +0000280}
281
282/* Check whether we can satisfy the import of the module named by
283 'fullname'. Return self if we can, None if we can't. */
284static PyObject *
285zipimporter_find_module(PyObject *obj, PyObject *args)
286{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000287 ZipImporter *self = (ZipImporter *)obj;
288 PyObject *path = NULL;
289 char *fullname;
290 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000291
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000292 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
293 &fullname, &path))
294 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000295
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 mi = get_module_info(self, fullname);
297 if (mi == MI_ERROR)
298 return NULL;
299 if (mi == MI_NOT_FOUND) {
300 Py_INCREF(Py_None);
301 return Py_None;
302 }
303 Py_INCREF(self);
304 return (PyObject *)self;
Just van Rossum52e14d62002-12-30 22:08:05 +0000305}
306
307/* Load and return the module named by 'fullname'. */
308static PyObject *
309zipimporter_load_module(PyObject *obj, PyObject *args)
310{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000311 ZipImporter *self = (ZipImporter *)obj;
312 PyObject *code, *mod, *dict;
313 char *fullname, *modpath;
314 int ispackage;
Just van Rossum52e14d62002-12-30 22:08:05 +0000315
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000316 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
317 &fullname))
318 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000319
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000320 code = get_module_code(self, fullname, &ispackage, &modpath);
321 if (code == NULL)
322 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000323
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000324 mod = PyImport_AddModule(fullname);
325 if (mod == NULL) {
326 Py_DECREF(code);
327 return NULL;
328 }
329 dict = PyModule_GetDict(mod);
Just van Rossum52e14d62002-12-30 22:08:05 +0000330
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000331 /* mod.__loader__ = self */
332 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
333 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000334
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 if (ispackage) {
336 /* add __path__ to the module *before* the code gets
337 executed */
338 PyObject *pkgpath, *fullpath;
339 char *subname = get_subname(fullname);
340 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000341
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 fullpath = PyUnicode_FromFormat("%U%c%U%s",
343 self->archive, SEP,
344 self->prefix, subname);
345 if (fullpath == NULL)
346 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000347
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 pkgpath = Py_BuildValue("[O]", fullpath);
349 Py_DECREF(fullpath);
350 if (pkgpath == NULL)
351 goto error;
352 err = PyDict_SetItemString(dict, "__path__", pkgpath);
353 Py_DECREF(pkgpath);
354 if (err != 0)
355 goto error;
356 }
357 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
358 Py_DECREF(code);
359 if (Py_VerboseFlag)
360 PySys_WriteStderr("import %s # loaded from Zip %s\n",
361 fullname, modpath);
362 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +0000363error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 Py_DECREF(code);
365 Py_DECREF(mod);
366 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000367}
368
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000369/* Return a string matching __file__ for the named module */
370static PyObject *
371zipimporter_get_filename(PyObject *obj, PyObject *args)
372{
373 ZipImporter *self = (ZipImporter *)obj;
374 PyObject *code;
375 char *fullname, *modpath;
376 int ispackage;
377
Nick Coghlan9a1d6e32009-02-08 03:37:27 +0000378 if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000379 &fullname))
Victor Stinnerc342fca2010-10-18 11:39:05 +0000380 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000381
382 /* Deciding the filename requires working out where the code
383 would come from if the module was actually loaded */
384 code = get_module_code(self, fullname, &ispackage, &modpath);
385 if (code == NULL)
Victor Stinnerc342fca2010-10-18 11:39:05 +0000386 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000387 Py_DECREF(code); /* Only need the path info */
388
389 return PyUnicode_FromString(modpath);
390}
391
Just van Rossum52e14d62002-12-30 22:08:05 +0000392/* Return a bool signifying whether the module is a package or not. */
393static PyObject *
394zipimporter_is_package(PyObject *obj, PyObject *args)
395{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 ZipImporter *self = (ZipImporter *)obj;
397 char *fullname;
398 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000400 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
401 &fullname))
402 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000403
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000404 mi = get_module_info(self, fullname);
405 if (mi == MI_ERROR)
406 return NULL;
407 if (mi == MI_NOT_FOUND) {
408 PyErr_Format(ZipImportError, "can't find module '%.200s'",
409 fullname);
410 return NULL;
411 }
412 return PyBool_FromLong(mi == MI_PACKAGE);
Just van Rossum52e14d62002-12-30 22:08:05 +0000413}
414
415static PyObject *
416zipimporter_get_data(PyObject *obj, PyObject *args)
417{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000418 ZipImporter *self = (ZipImporter *)obj;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000419 PyObject *pathobj, *key;
420 const Py_UNICODE *path;
Just van Rossum52e14d62002-12-30 22:08:05 +0000421#ifdef ALTSEP
Victor Stinner60fe8d92010-08-16 23:48:11 +0000422 Py_UNICODE *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000423#endif
Victor Stinner60fe8d92010-08-16 23:48:11 +0000424 Py_UNICODE *archive;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000425 PyObject *toc_entry;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000426 Py_ssize_t path_len, len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000427
Victor Stinner60fe8d92010-08-16 23:48:11 +0000428 if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &pathobj))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000429 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000430
Victor Stinner60fe8d92010-08-16 23:48:11 +0000431 path_len = PyUnicode_GET_SIZE(pathobj);
432 path = PyUnicode_AS_UNICODE(pathobj);
Just van Rossum52e14d62002-12-30 22:08:05 +0000433#ifdef ALTSEP
Victor Stinner60fe8d92010-08-16 23:48:11 +0000434 if (path_len >= MAXPATHLEN) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000435 PyErr_SetString(ZipImportError, "path too long");
436 return NULL;
437 }
Victor Stinner60fe8d92010-08-16 23:48:11 +0000438 Py_UNICODE_strcpy(buf, path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000439 for (p = buf; *p; p++) {
440 if (*p == ALTSEP)
441 *p = SEP;
442 }
443 path = buf;
Just van Rossum52e14d62002-12-30 22:08:05 +0000444#endif
Victor Stinner60fe8d92010-08-16 23:48:11 +0000445 archive = PyUnicode_AS_UNICODE(self->archive);
446 len = PyUnicode_GET_SIZE(self->archive);
447 if ((size_t)len < Py_UNICODE_strlen(path) &&
448 Py_UNICODE_strncmp(path, archive, len) == 0 &&
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 path[len] == SEP) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000450 path += len + 1;
451 path_len -= len + 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000453
Victor Stinner60fe8d92010-08-16 23:48:11 +0000454 key = PyUnicode_FromUnicode(path, path_len);
455 if (key == NULL)
456 return NULL;
457 toc_entry = PyDict_GetItem(self->files, key);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000458 if (toc_entry == NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000459 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key);
460 Py_DECREF(key);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 return NULL;
462 }
Victor Stinner60fe8d92010-08-16 23:48:11 +0000463 Py_DECREF(key);
464 return get_data(self->archive, toc_entry);
Just van Rossum52e14d62002-12-30 22:08:05 +0000465}
466
467static PyObject *
468zipimporter_get_code(PyObject *obj, PyObject *args)
469{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000470 ZipImporter *self = (ZipImporter *)obj;
471 char *fullname;
Just van Rossum52e14d62002-12-30 22:08:05 +0000472
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000473 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
474 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000475
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000476 return get_module_code(self, fullname, NULL, NULL);
Just van Rossum52e14d62002-12-30 22:08:05 +0000477}
478
479static PyObject *
480zipimporter_get_source(PyObject *obj, PyObject *args)
481{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000482 ZipImporter *self = (ZipImporter *)obj;
483 PyObject *toc_entry;
484 char *fullname, *subname, path[MAXPATHLEN+1];
485 int len;
486 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000487
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000488 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
489 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000490
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000491 mi = get_module_info(self, fullname);
492 if (mi == MI_ERROR)
493 return NULL;
494 if (mi == MI_NOT_FOUND) {
495 PyErr_Format(ZipImportError, "can't find module '%.200s'",
496 fullname);
497 return NULL;
498 }
499 subname = get_subname(fullname);
Just van Rossum52e14d62002-12-30 22:08:05 +0000500
Victor Stinner72f767e2010-10-18 11:44:21 +0000501 len = make_filename(self->prefix, subname, path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000502 if (len < 0)
503 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000504
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505 if (mi == MI_PACKAGE) {
506 path[len] = SEP;
507 strcpy(path + len + 1, "__init__.py");
508 }
509 else
510 strcpy(path + len, ".py");
Just van Rossum52e14d62002-12-30 22:08:05 +0000511
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000512 toc_entry = PyDict_GetItemString(self->files, path);
513 if (toc_entry != NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000514 PyObject *res, *bytes;
515 bytes = get_data(self->archive, toc_entry);
516 if (bytes == NULL)
517 return NULL;
518 res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
519 PyBytes_GET_SIZE(bytes));
520 Py_DECREF(bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000521 return res;
522 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000524 /* we have the module, but no source */
525 Py_INCREF(Py_None);
526 return Py_None;
Just van Rossum52e14d62002-12-30 22:08:05 +0000527}
528
529PyDoc_STRVAR(doc_find_module,
530"find_module(fullname, path=None) -> self or None.\n\
531\n\
532Search for a module specified by 'fullname'. 'fullname' must be the\n\
533fully qualified (dotted) module name. It returns the zipimporter\n\
534instance itself if the module was found, or None if it wasn't.\n\
535The optional 'path' argument is ignored -- it's there for compatibility\n\
536with the importer protocol.");
537
538PyDoc_STRVAR(doc_load_module,
539"load_module(fullname) -> module.\n\
540\n\
541Load the module specified by 'fullname'. 'fullname' must be the\n\
542fully qualified (dotted) module name. It returns the imported\n\
543module, or raises ZipImportError if it wasn't found.");
544
545PyDoc_STRVAR(doc_get_data,
546"get_data(pathname) -> string with file data.\n\
547\n\
548Return the data associated with 'pathname'. Raise IOError if\n\
549the file wasn't found.");
550
551PyDoc_STRVAR(doc_is_package,
552"is_package(fullname) -> bool.\n\
553\n\
554Return True if the module specified by fullname is a package.\n\
Brian Curtin32839732010-07-21 01:44:19 +0000555Raise ZipImportError if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000556
557PyDoc_STRVAR(doc_get_code,
558"get_code(fullname) -> code object.\n\
559\n\
560Return the code object for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000561if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000562
563PyDoc_STRVAR(doc_get_source,
564"get_source(fullname) -> source string.\n\
565\n\
566Return the source code for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000567if the module couldn't be found, return None if the archive does\n\
Just van Rossum52e14d62002-12-30 22:08:05 +0000568contain the module, but has no source for it.");
569
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000570
571PyDoc_STRVAR(doc_get_filename,
Nick Coghlan9a1d6e32009-02-08 03:37:27 +0000572"get_filename(fullname) -> filename string.\n\
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000573\n\
574Return the filename for the specified module.");
575
Just van Rossum52e14d62002-12-30 22:08:05 +0000576static PyMethodDef zipimporter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000577 {"find_module", zipimporter_find_module, METH_VARARGS,
578 doc_find_module},
579 {"load_module", zipimporter_load_module, METH_VARARGS,
580 doc_load_module},
581 {"get_data", zipimporter_get_data, METH_VARARGS,
582 doc_get_data},
583 {"get_code", zipimporter_get_code, METH_VARARGS,
584 doc_get_code},
585 {"get_source", zipimporter_get_source, METH_VARARGS,
586 doc_get_source},
587 {"get_filename", zipimporter_get_filename, METH_VARARGS,
588 doc_get_filename},
589 {"is_package", zipimporter_is_package, METH_VARARGS,
590 doc_is_package},
591 {NULL, NULL} /* sentinel */
Just van Rossum52e14d62002-12-30 22:08:05 +0000592};
593
594static PyMemberDef zipimporter_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000595 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
596 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
597 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
598 {NULL}
Just van Rossum52e14d62002-12-30 22:08:05 +0000599};
600
601PyDoc_STRVAR(zipimporter_doc,
602"zipimporter(archivepath) -> zipimporter object\n\
603\n\
604Create a new zipimporter instance. 'archivepath' must be a path to\n\
Alexandre Vassalotti8ae3e052008-05-16 00:41:41 +0000605a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
606'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
607valid directory inside the archive.\n\
608\n\
609'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
610archive.\n\
611\n\
612The 'archive' attribute of zipimporter objects contains the name of the\n\
613zipfile targeted.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000614
615#define DEFERRED_ADDRESS(ADDR) 0
616
617static PyTypeObject ZipImporter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000618 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
619 "zipimport.zipimporter",
620 sizeof(ZipImporter),
621 0, /* tp_itemsize */
622 (destructor)zipimporter_dealloc, /* tp_dealloc */
623 0, /* tp_print */
624 0, /* tp_getattr */
625 0, /* tp_setattr */
626 0, /* tp_reserved */
627 (reprfunc)zipimporter_repr, /* tp_repr */
628 0, /* tp_as_number */
629 0, /* tp_as_sequence */
630 0, /* tp_as_mapping */
631 0, /* tp_hash */
632 0, /* tp_call */
633 0, /* tp_str */
634 PyObject_GenericGetAttr, /* tp_getattro */
635 0, /* tp_setattro */
636 0, /* tp_as_buffer */
637 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
638 Py_TPFLAGS_HAVE_GC, /* tp_flags */
639 zipimporter_doc, /* tp_doc */
640 zipimporter_traverse, /* tp_traverse */
641 0, /* tp_clear */
642 0, /* tp_richcompare */
643 0, /* tp_weaklistoffset */
644 0, /* tp_iter */
645 0, /* tp_iternext */
646 zipimporter_methods, /* tp_methods */
647 zipimporter_members, /* tp_members */
648 0, /* tp_getset */
649 0, /* tp_base */
650 0, /* tp_dict */
651 0, /* tp_descr_get */
652 0, /* tp_descr_set */
653 0, /* tp_dictoffset */
654 (initproc)zipimporter_init, /* tp_init */
655 PyType_GenericAlloc, /* tp_alloc */
656 PyType_GenericNew, /* tp_new */
657 PyObject_GC_Del, /* tp_free */
Just van Rossum52e14d62002-12-30 22:08:05 +0000658};
659
660
661/* implementation */
662
Just van Rossum52e14d62002-12-30 22:08:05 +0000663/* Given a buffer, return the long that is represented by the first
664 4 bytes, encoded as little endian. This partially reimplements
665 marshal.c:r_long() */
666static long
667get_long(unsigned char *buf) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 long x;
669 x = buf[0];
670 x |= (long)buf[1] << 8;
671 x |= (long)buf[2] << 16;
672 x |= (long)buf[3] << 24;
Just van Rossum52e14d62002-12-30 22:08:05 +0000673#if SIZEOF_LONG > 4
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 /* Sign extension for 64-bit machines */
675 x |= -(x & 0x80000000L);
Just van Rossum52e14d62002-12-30 22:08:05 +0000676#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000677 return x;
Just van Rossum52e14d62002-12-30 22:08:05 +0000678}
679
680/*
681 read_directory(archive) -> files dict (new reference)
682
683 Given a path to a Zip archive, build a dict, mapping file names
684 (local to the archive, using SEP as a separator) to toc entries.
685
686 A toc_entry is a tuple:
687
Victor Stinnerc342fca2010-10-18 11:39:05 +0000688 (__file__, # value to use for __file__, available for all files
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000689 compress, # compression kind; 0 for uncompressed
690 data_size, # size of compressed data on disk
691 file_size, # size of decompressed data
692 file_offset, # offset of file header from start of archive
693 time, # mod time of file (in dos format)
694 date, # mod data of file (in dos format)
695 crc, # crc checksum of the data
Victor Stinnerc342fca2010-10-18 11:39:05 +0000696 )
Just van Rossum52e14d62002-12-30 22:08:05 +0000697
698 Directories can be recognized by the trailing SEP in the name,
699 data_size and file_offset are 0.
700*/
701static PyObject *
Victor Stinner2460a432010-08-16 17:54:28 +0000702read_directory(PyObject *archive_obj)
Just van Rossum52e14d62002-12-30 22:08:05 +0000703{
Victor Stinner2460a432010-08-16 17:54:28 +0000704 /* FIXME: work on Py_UNICODE* instead of char* */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000705 PyObject *files = NULL;
706 FILE *fp;
707 long compress, crc, data_size, file_size, file_offset, date, time;
708 long header_offset, name_size, header_size, header_position;
709 long i, l, count;
710 size_t length;
Victor Stinner2460a432010-08-16 17:54:28 +0000711 Py_UNICODE path[MAXPATHLEN + 5];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000712 char name[MAXPATHLEN + 5];
Victor Stinner2460a432010-08-16 17:54:28 +0000713 PyObject *nameobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000714 char *p, endof_central_dir[22];
715 long arc_offset; /* offset from beginning of file to start of zip-archive */
Victor Stinner2460a432010-08-16 17:54:28 +0000716 PyObject *pathobj;
Just van Rossum52e14d62002-12-30 22:08:05 +0000717
Victor Stinner2460a432010-08-16 17:54:28 +0000718 if (PyUnicode_GET_SIZE(archive_obj) > MAXPATHLEN) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000719 PyErr_SetString(PyExc_OverflowError,
720 "Zip path name is too long");
721 return NULL;
722 }
Victor Stinner2460a432010-08-16 17:54:28 +0000723 Py_UNICODE_strcpy(path, PyUnicode_AS_UNICODE(archive_obj));
Just van Rossum52e14d62002-12-30 22:08:05 +0000724
Victor Stinner2460a432010-08-16 17:54:28 +0000725 fp = _Py_fopen(archive_obj, "rb");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 if (fp == NULL) {
727 PyErr_Format(ZipImportError, "can't open Zip file: "
Victor Stinner2460a432010-08-16 17:54:28 +0000728 "'%.200U'", archive_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 return NULL;
730 }
731 fseek(fp, -22, SEEK_END);
732 header_position = ftell(fp);
733 if (fread(endof_central_dir, 1, 22, fp) != 22) {
734 fclose(fp);
735 PyErr_Format(ZipImportError, "can't read Zip file: "
Victor Stinner2460a432010-08-16 17:54:28 +0000736 "'%.200U'", archive_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000737 return NULL;
738 }
739 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
740 /* Bad: End of Central Dir signature */
741 fclose(fp);
742 PyErr_Format(ZipImportError, "not a Zip file: "
Victor Stinner2460a432010-08-16 17:54:28 +0000743 "'%.200U'", archive_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 return NULL;
745 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000747 header_size = get_long((unsigned char *)endof_central_dir + 12);
748 header_offset = get_long((unsigned char *)endof_central_dir + 16);
749 arc_offset = header_position - header_offset - header_size;
750 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000752 files = PyDict_New();
753 if (files == NULL)
754 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000755
Victor Stinner2460a432010-08-16 17:54:28 +0000756 length = Py_UNICODE_strlen(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000757 path[length] = SEP;
Just van Rossum52e14d62002-12-30 22:08:05 +0000758
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000759 /* Start of Central Directory */
760 count = 0;
761 for (;;) {
762 PyObject *t;
763 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000764
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000765 fseek(fp, header_offset, 0); /* Start of file header */
766 l = PyMarshal_ReadLongFromFile(fp);
767 if (l != 0x02014B50)
768 break; /* Bad: Central Dir File Header */
769 fseek(fp, header_offset + 10, 0);
770 compress = PyMarshal_ReadShortFromFile(fp);
771 time = PyMarshal_ReadShortFromFile(fp);
772 date = PyMarshal_ReadShortFromFile(fp);
773 crc = PyMarshal_ReadLongFromFile(fp);
774 data_size = PyMarshal_ReadLongFromFile(fp);
775 file_size = PyMarshal_ReadLongFromFile(fp);
776 name_size = PyMarshal_ReadShortFromFile(fp);
777 header_size = 46 + name_size +
778 PyMarshal_ReadShortFromFile(fp) +
779 PyMarshal_ReadShortFromFile(fp);
780 fseek(fp, header_offset + 42, 0);
781 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
782 if (name_size > MAXPATHLEN)
783 name_size = MAXPATHLEN;
Just van Rossum52e14d62002-12-30 22:08:05 +0000784
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000785 p = name;
786 for (i = 0; i < name_size; i++) {
787 *p = (char)getc(fp);
788 if (*p == '/')
789 *p = SEP;
790 p++;
791 }
792 *p = 0; /* Add terminating null byte */
793 header_offset += header_size;
Just van Rossum52e14d62002-12-30 22:08:05 +0000794
Victor Stinner2460a432010-08-16 17:54:28 +0000795 nameobj = PyUnicode_DecodeFSDefaultAndSize(name, name_size);
796 if (nameobj == NULL)
797 goto error;
798 Py_UNICODE_strncpy(path + length + 1, PyUnicode_AS_UNICODE(nameobj), MAXPATHLEN - length - 1);
Just van Rossum52e14d62002-12-30 22:08:05 +0000799
Victor Stinner2460a432010-08-16 17:54:28 +0000800 pathobj = PyUnicode_FromUnicode(path, Py_UNICODE_strlen(path));
801 if (pathobj == NULL)
802 goto error;
803 t = Py_BuildValue("Niiiiiii", pathobj, compress, data_size,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000804 file_size, file_offset, time, date, crc);
805 if (t == NULL)
806 goto error;
Victor Stinner2460a432010-08-16 17:54:28 +0000807 err = PyDict_SetItem(files, nameobj, t);
808 Py_CLEAR(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000809 Py_DECREF(t);
810 if (err != 0)
811 goto error;
812 count++;
813 }
814 fclose(fp);
815 if (Py_VerboseFlag)
Victor Stinner2460a432010-08-16 17:54:28 +0000816 PySys_FormatStderr("# zipimport: found %ld names in %U\n",
817 count, archive_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000818 return files;
Just van Rossum52e14d62002-12-30 22:08:05 +0000819error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000820 fclose(fp);
821 Py_XDECREF(files);
Victor Stinner2460a432010-08-16 17:54:28 +0000822 Py_XDECREF(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000823 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000824}
825
826/* Return the zlib.decompress function object, or NULL if zlib couldn't
827 be imported. The function is cached when found, so subsequent calls
828 don't import zlib again. Returns a *borrowed* reference.
829 XXX This makes zlib.decompress immortal. */
830static PyObject *
831get_decompress_func(void)
832{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000833 static PyObject *decompress = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000834
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000835 if (decompress == NULL) {
836 PyObject *zlib;
837 static int importing_zlib = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000838
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000839 if (importing_zlib != 0)
840 /* Someone has a zlib.py[co] in their Zip file;
841 let's avoid a stack overflow. */
842 return NULL;
843 importing_zlib = 1;
844 zlib = PyImport_ImportModuleNoBlock("zlib");
845 importing_zlib = 0;
846 if (zlib != NULL) {
847 decompress = PyObject_GetAttrString(zlib,
848 "decompress");
849 Py_DECREF(zlib);
850 }
851 else
852 PyErr_Clear();
853 if (Py_VerboseFlag)
854 PySys_WriteStderr("# zipimport: zlib %s\n",
855 zlib != NULL ? "available": "UNAVAILABLE");
856 }
857 return decompress;
Just van Rossum52e14d62002-12-30 22:08:05 +0000858}
859
860/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
861 data as a new reference. */
862static PyObject *
Victor Stinner60fe8d92010-08-16 23:48:11 +0000863get_data(PyObject *archive, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +0000864{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000865 PyObject *raw_data, *data = NULL, *decompress;
866 char *buf;
867 FILE *fp;
868 int err;
869 Py_ssize_t bytes_read = 0;
870 long l;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000871 PyObject *datapath;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 long compress, data_size, file_size, file_offset, bytes_size;
873 long time, date, crc;
Just van Rossum52e14d62002-12-30 22:08:05 +0000874
Victor Stinner60fe8d92010-08-16 23:48:11 +0000875 if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000876 &data_size, &file_size, &file_offset, &time,
877 &date, &crc)) {
878 return NULL;
879 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000880
Victor Stinner60fe8d92010-08-16 23:48:11 +0000881 fp = _Py_fopen(archive, "rb");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 if (!fp) {
883 PyErr_Format(PyExc_IOError,
Victor Stinner60fe8d92010-08-16 23:48:11 +0000884 "zipimport: can not open file %U", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000885 return NULL;
886 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000887
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000888 /* Check to make sure the local file header is correct */
889 fseek(fp, file_offset, 0);
890 l = PyMarshal_ReadLongFromFile(fp);
891 if (l != 0x04034B50) {
892 /* Bad: Local File Header */
893 PyErr_Format(ZipImportError,
Victor Stinner60fe8d92010-08-16 23:48:11 +0000894 "bad local file header in %U",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000895 archive);
896 fclose(fp);
897 return NULL;
898 }
899 fseek(fp, file_offset + 26, 0);
900 l = 30 + PyMarshal_ReadShortFromFile(fp) +
901 PyMarshal_ReadShortFromFile(fp); /* local header size */
902 file_offset += l; /* Start of file data */
Just van Rossum52e14d62002-12-30 22:08:05 +0000903
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 bytes_size = compress == 0 ? data_size : data_size + 1;
905 if (bytes_size == 0)
906 bytes_size++;
907 raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
Just van Rossum52e14d62002-12-30 22:08:05 +0000908
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000909 if (raw_data == NULL) {
910 fclose(fp);
911 return NULL;
912 }
913 buf = PyBytes_AsString(raw_data);
Just van Rossum52e14d62002-12-30 22:08:05 +0000914
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000915 err = fseek(fp, file_offset, 0);
916 if (err == 0)
917 bytes_read = fread(buf, 1, data_size, fp);
918 fclose(fp);
919 if (err || bytes_read != data_size) {
920 PyErr_SetString(PyExc_IOError,
921 "zipimport: can't read data");
922 Py_DECREF(raw_data);
923 return NULL;
924 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000925
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000926 if (compress != 0) {
927 buf[data_size] = 'Z'; /* saw this in zipfile.py */
928 data_size++;
929 }
930 buf[data_size] = '\0';
Just van Rossum52e14d62002-12-30 22:08:05 +0000931
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000932 if (compress == 0) { /* data is not compressed */
933 data = PyBytes_FromStringAndSize(buf, data_size);
934 Py_DECREF(raw_data);
935 return data;
936 }
937
938 /* Decompress with zlib */
939 decompress = get_decompress_func();
940 if (decompress == NULL) {
941 PyErr_SetString(ZipImportError,
942 "can't decompress data; "
943 "zlib not available");
944 goto error;
945 }
946 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000947error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000948 Py_DECREF(raw_data);
949 return data;
Just van Rossum52e14d62002-12-30 22:08:05 +0000950}
951
952/* Lenient date/time comparison function. The precision of the mtime
953 in the archive is lower than the mtime stored in a .pyc: we
954 must allow a difference of at most one second. */
955static int
956eq_mtime(time_t t1, time_t t2)
957{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000958 time_t d = t1 - t2;
959 if (d < 0)
960 d = -d;
961 /* dostime only stores even seconds, so be lenient */
962 return d <= 1;
Just van Rossum52e14d62002-12-30 22:08:05 +0000963}
964
965/* Given the contents of a .py[co] file in a buffer, unmarshal the data
966 and return the code object. Return None if it the magic word doesn't
967 match (we do this instead of raising an exception as we fall back
968 to .py if available and we don't want to mask other errors).
969 Returns a new reference. */
970static PyObject *
971unmarshal_code(char *pathname, PyObject *data, time_t mtime)
972{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000973 PyObject *code;
974 char *buf = PyBytes_AsString(data);
975 Py_ssize_t size = PyBytes_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +0000976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000977 if (size <= 9) {
978 PyErr_SetString(ZipImportError,
979 "bad pyc data");
980 return NULL;
981 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000982
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000983 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
984 if (Py_VerboseFlag)
985 PySys_WriteStderr("# %s has bad magic\n",
986 pathname);
987 Py_INCREF(Py_None);
988 return Py_None; /* signal caller to try alternative */
989 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000990
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000991 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
992 mtime)) {
993 if (Py_VerboseFlag)
994 PySys_WriteStderr("# %s has bad mtime\n",
995 pathname);
996 Py_INCREF(Py_None);
997 return Py_None; /* signal caller to try alternative */
998 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000999
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001000 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
1001 if (code == NULL)
1002 return NULL;
1003 if (!PyCode_Check(code)) {
1004 Py_DECREF(code);
1005 PyErr_Format(PyExc_TypeError,
1006 "compiled module %.200s is not a code object",
1007 pathname);
1008 return NULL;
1009 }
1010 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001011}
1012
1013/* Replace any occurances of "\r\n?" in the input string with "\n".
1014 This converts DOS and Mac line endings to Unix line endings.
1015 Also append a trailing "\n" to be compatible with
1016 PyParser_SimpleParseFile(). Returns a new reference. */
1017static PyObject *
1018normalize_line_endings(PyObject *source)
1019{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 char *buf, *q, *p = PyBytes_AsString(source);
1021 PyObject *fixed_source;
1022 int len = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +00001023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 if (!p) {
1025 return PyBytes_FromStringAndSize("\n\0", 2);
1026 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001027
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001028 /* one char extra for trailing \n and one for terminating \0 */
1029 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1030 if (buf == NULL) {
1031 PyErr_SetString(PyExc_MemoryError,
1032 "zipimport: no memory to allocate "
1033 "source buffer");
1034 return NULL;
1035 }
1036 /* replace "\r\n?" by "\n" */
1037 for (q = buf; *p != '\0'; p++) {
1038 if (*p == '\r') {
1039 *q++ = '\n';
1040 if (*(p + 1) == '\n')
1041 p++;
1042 }
1043 else
1044 *q++ = *p;
1045 len++;
1046 }
1047 *q++ = '\n'; /* add trailing \n */
1048 *q = '\0';
1049 fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1050 PyMem_Free(buf);
1051 return fixed_source;
Just van Rossum52e14d62002-12-30 22:08:05 +00001052}
1053
1054/* Given a string buffer containing Python source code, compile it
1055 return and return a code object as a new reference. */
1056static PyObject *
1057compile_source(char *pathname, PyObject *source)
1058{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 PyObject *code, *fixed_source;
Just van Rossum52e14d62002-12-30 22:08:05 +00001060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001061 fixed_source = normalize_line_endings(source);
1062 if (fixed_source == NULL)
1063 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001065 code = Py_CompileString(PyBytes_AsString(fixed_source), pathname,
1066 Py_file_input);
1067 Py_DECREF(fixed_source);
1068 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001069}
1070
1071/* Convert the date/time values found in the Zip archive to a value
1072 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001073static time_t
1074parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001075{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001076 struct tm stm;
Just van Rossum52e14d62002-12-30 22:08:05 +00001077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 memset((void *) &stm, '\0', sizeof(stm));
Christian Heimes679db4a2008-01-18 09:56:22 +00001079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 stm.tm_sec = (dostime & 0x1f) * 2;
1081 stm.tm_min = (dostime >> 5) & 0x3f;
1082 stm.tm_hour = (dostime >> 11) & 0x1f;
1083 stm.tm_mday = dosdate & 0x1f;
1084 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1085 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
1086 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001087
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001088 return mktime(&stm);
Just van Rossum52e14d62002-12-30 22:08:05 +00001089}
1090
1091/* Given a path to a .pyc or .pyo file in the archive, return the
1092 modifictaion time of the matching .py file, or 0 if no source
1093 is available. */
1094static time_t
1095get_mtime_of_source(ZipImporter *self, char *path)
1096{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 PyObject *toc_entry;
1098 time_t mtime = 0;
1099 Py_ssize_t lastchar = strlen(path) - 1;
1100 char savechar = path[lastchar];
1101 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1102 toc_entry = PyDict_GetItemString(self->files, path);
1103 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1104 PyTuple_Size(toc_entry) == 8) {
1105 /* fetch the time stamp of the .py file for comparison
1106 with an embedded pyc time stamp */
1107 int time, date;
1108 time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1109 date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1110 mtime = parse_dostime(time, date);
1111 }
1112 path[lastchar] = savechar;
1113 return mtime;
Just van Rossum52e14d62002-12-30 22:08:05 +00001114}
1115
1116/* Return the code object for the module named by 'fullname' from the
1117 Zip archive as a new reference. */
1118static PyObject *
1119get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001120 time_t mtime, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +00001121{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001122 PyObject *data, *code;
1123 char *modpath;
Just van Rossum52e14d62002-12-30 22:08:05 +00001124
Victor Stinner60fe8d92010-08-16 23:48:11 +00001125 data = get_data(self->archive, toc_entry);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 if (data == NULL)
1127 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001128
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 modpath = _PyUnicode_AsString(PyTuple_GetItem(toc_entry, 0));
Victor Stinner5a7913e2010-10-16 11:29:07 +00001130 if (modpath == NULL) {
1131 Py_DECREF(data);
1132 return NULL;
1133 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001134
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001135 if (isbytecode) {
1136 code = unmarshal_code(modpath, data, mtime);
1137 }
1138 else {
1139 code = compile_source(modpath, data);
1140 }
1141 Py_DECREF(data);
1142 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001143}
1144
1145/* Get the code object assoiciated with the module specified by
1146 'fullname'. */
1147static PyObject *
1148get_module_code(ZipImporter *self, char *fullname,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 int *p_ispackage, char **p_modpath)
Just van Rossum52e14d62002-12-30 22:08:05 +00001150{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 PyObject *toc_entry;
1152 char *subname, path[MAXPATHLEN + 1];
1153 int len;
1154 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +00001155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 subname = get_subname(fullname);
Just van Rossum52e14d62002-12-30 22:08:05 +00001157
Victor Stinner72f767e2010-10-18 11:44:21 +00001158 len = make_filename(self->prefix, subname, path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 if (len < 0)
1160 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001161
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 for (zso = zip_searchorder; *zso->suffix; zso++) {
1163 PyObject *code = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001164
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 strcpy(path + len, zso->suffix);
1166 if (Py_VerboseFlag > 1)
Victor Stinner353349c2010-10-18 11:40:40 +00001167 PySys_FormatStderr("# trying %U%c%s\n",
Victor Stinner72f767e2010-10-18 11:44:21 +00001168 self->archive, (int)SEP, path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 toc_entry = PyDict_GetItemString(self->files, path);
1170 if (toc_entry != NULL) {
1171 time_t mtime = 0;
1172 int ispackage = zso->type & IS_PACKAGE;
1173 int isbytecode = zso->type & IS_BYTECODE;
Just van Rossum52e14d62002-12-30 22:08:05 +00001174
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 if (isbytecode)
1176 mtime = get_mtime_of_source(self, path);
1177 if (p_ispackage != NULL)
1178 *p_ispackage = ispackage;
1179 code = get_code_from_data(self, ispackage,
1180 isbytecode, mtime,
1181 toc_entry);
1182 if (code == Py_None) {
1183 /* bad magic number or non-matching mtime
1184 in byte code, try next */
1185 Py_DECREF(code);
1186 continue;
1187 }
1188 if (code != NULL && p_modpath != NULL)
1189 *p_modpath = _PyUnicode_AsString(
1190 PyTuple_GetItem(toc_entry, 0));
1191 return code;
1192 }
1193 }
1194 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1195 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001196}
1197
1198
1199/* Module init */
1200
1201PyDoc_STRVAR(zipimport_doc,
1202"zipimport provides support for importing Python modules from Zip archives.\n\
1203\n\
1204This module exports three objects:\n\
1205- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001206- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001207 subclass of ImportError, so it can be caught as ImportError, too.\n\
1208- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1209 info dicts, as used in zipimporter._files.\n\
1210\n\
1211It is usually not needed to use the zipimport module explicitly; it is\n\
1212used by the builtin import mechanism for sys.path items that are paths\n\
1213to Zip archives.");
1214
Martin v. Löwis1a214512008-06-11 05:26:20 +00001215static struct PyModuleDef zipimportmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001216 PyModuleDef_HEAD_INIT,
1217 "zipimport",
1218 zipimport_doc,
1219 -1,
1220 NULL,
1221 NULL,
1222 NULL,
1223 NULL,
1224 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001225};
1226
Just van Rossum52e14d62002-12-30 22:08:05 +00001227PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001228PyInit_zipimport(void)
Just van Rossum52e14d62002-12-30 22:08:05 +00001229{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 PyObject *mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001231
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001232 if (PyType_Ready(&ZipImporter_Type) < 0)
1233 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 /* Correct directory separator */
1236 zip_searchorder[0].suffix[0] = SEP;
1237 zip_searchorder[1].suffix[0] = SEP;
1238 zip_searchorder[2].suffix[0] = SEP;
1239 if (Py_OptimizeFlag) {
1240 /* Reverse *.pyc and *.pyo */
1241 struct st_zip_searchorder tmp;
1242 tmp = zip_searchorder[0];
1243 zip_searchorder[0] = zip_searchorder[1];
1244 zip_searchorder[1] = tmp;
1245 tmp = zip_searchorder[3];
1246 zip_searchorder[3] = zip_searchorder[4];
1247 zip_searchorder[4] = tmp;
1248 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 mod = PyModule_Create(&zipimportmodule);
1251 if (mod == NULL)
1252 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001253
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001254 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1255 PyExc_ImportError, NULL);
1256 if (ZipImportError == NULL)
1257 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001258
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001259 Py_INCREF(ZipImportError);
1260 if (PyModule_AddObject(mod, "ZipImportError",
1261 ZipImportError) < 0)
1262 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001263
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001264 Py_INCREF(&ZipImporter_Type);
1265 if (PyModule_AddObject(mod, "zipimporter",
1266 (PyObject *)&ZipImporter_Type) < 0)
1267 return NULL;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001268
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001269 zip_directory_cache = PyDict_New();
1270 if (zip_directory_cache == NULL)
1271 return NULL;
1272 Py_INCREF(zip_directory_cache);
1273 if (PyModule_AddObject(mod, "_zip_directory_cache",
1274 zip_directory_cache) < 0)
1275 return NULL;
1276 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001277}