blob: a1ee70b765f2346f2bff8b3c599d182e323a2fde [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000013 char suffix[14];
14 int type;
Just van Rossum52e14d62002-12-30 22:08:05 +000015};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
Just van Rossum52e14d62002-12-30 22:08:05 +000030};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000037 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
Just van Rossum52e14d62002-12-30 22:08:05 +000041};
42
Just van Rossum52e14d62002-12-30 22:08:05 +000043static PyObject *ZipImportError;
44static PyObject *zip_directory_cache = NULL;
45
46/* forward decls */
Victor Stinner2460a432010-08-16 17:54:28 +000047static PyObject *read_directory(PyObject *archive);
Victor Stinner60fe8d92010-08-16 23:48:11 +000048static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
Just van Rossum52e14d62002-12-30 22:08:05 +000049static PyObject *get_module_code(ZipImporter *self, char *fullname,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000050 int *p_ispackage, char **p_modpath);
Just van Rossum52e14d62002-12-30 22:08:05 +000051
52
53#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
54
55
56/* zipimporter.__init__
57 Split the "subdirectory" from the Zip archive path, lookup a matching
58 entry in sys.path_importer_cache, fetch the file directory from there
59 if found, or else read it from the archive. */
60static int
61zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
62{
Victor Stinner2460a432010-08-16 17:54:28 +000063 PyObject *pathobj, *files;
Victor Stinner2b8dab72010-08-14 14:54:10 +000064 Py_UNICODE *path, *p, *prefix, buf[MAXPATHLEN+2];
65 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +000066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 if (!_PyArg_NoKeywords("zipimporter()", kwds))
68 return -1;
Georg Brandl02c42872005-08-26 06:42:30 +000069
Victor Stinner2b8dab72010-08-14 14:54:10 +000070 if (!PyArg_ParseTuple(args, "O&:zipimporter",
71 PyUnicode_FSDecoder, &pathobj))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +000073
Victor Stinner2b8dab72010-08-14 14:54:10 +000074 /* copy path to buf */
75 len = PyUnicode_GET_SIZE(pathobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 if (len == 0) {
77 PyErr_SetString(ZipImportError, "archive path is empty");
Victor Stinner2b8dab72010-08-14 14:54:10 +000078 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 }
80 if (len >= MAXPATHLEN) {
81 PyErr_SetString(ZipImportError,
82 "archive path too long");
Victor Stinner2b8dab72010-08-14 14:54:10 +000083 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 }
Victor Stinner2b8dab72010-08-14 14:54:10 +000085 Py_UNICODE_strcpy(buf, PyUnicode_AS_UNICODE(pathobj));
Just van Rossum52e14d62002-12-30 22:08:05 +000086
87#ifdef ALTSEP
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000088 for (p = buf; *p; p++) {
89 if (*p == ALTSEP)
90 *p = SEP;
91 }
Just van Rossum52e14d62002-12-30 22:08:05 +000092#endif
93
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 path = NULL;
95 prefix = NULL;
96 for (;;) {
97 struct stat statbuf;
98 int rv;
Just van Rossum52e14d62002-12-30 22:08:05 +000099
Victor Stinner2b8dab72010-08-14 14:54:10 +0000100 if (pathobj == NULL) {
101 pathobj = PyUnicode_FromUnicode(buf, len);
102 if (pathobj == NULL)
103 goto error;
104 }
105 rv = _Py_stat(pathobj, &statbuf);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 if (rv == 0) {
107 /* it exists */
108 if (S_ISREG(statbuf.st_mode))
109 /* it's a file */
110 path = buf;
111 break;
112 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000113 else if (PyErr_Occurred())
114 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 /* back up one path element */
Victor Stinner2b8dab72010-08-14 14:54:10 +0000116 p = Py_UNICODE_strrchr(buf, SEP);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000117 if (prefix != NULL)
118 *prefix = SEP;
119 if (p == NULL)
120 break;
121 *p = '\0';
Victor Stinner2b8dab72010-08-14 14:54:10 +0000122 len = p - buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 prefix = p;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000124 Py_CLEAR(pathobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000126 if (path == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 PyErr_SetString(ZipImportError, "not a Zip file");
Victor Stinner2b8dab72010-08-14 14:54:10 +0000128 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000130
Victor Stinner2b8dab72010-08-14 14:54:10 +0000131 files = PyDict_GetItem(zip_directory_cache, pathobj);
132 if (files == NULL) {
Victor Stinner2460a432010-08-16 17:54:28 +0000133 files = read_directory(pathobj);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000134 if (files == NULL)
135 goto error;
136 if (PyDict_SetItem(zip_directory_cache, pathobj, files) != 0)
137 goto error;
138 }
139 else
140 Py_INCREF(files);
141 self->files = files;
142
143 self->archive = pathobj;
144 pathobj = NULL;
145
146 if (prefix != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 prefix++;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000148 len = Py_UNICODE_strlen(prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 if (prefix[len-1] != SEP) {
150 /* add trailing SEP */
151 prefix[len] = SEP;
152 prefix[len + 1] = '\0';
Victor Stinner2b8dab72010-08-14 14:54:10 +0000153 len++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 }
155 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000156 else
157 len = 0;
158 self->prefix = PyUnicode_FromUnicode(prefix, len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 if (self->prefix == NULL)
Victor Stinner2b8dab72010-08-14 14:54:10 +0000160 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000161
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000162 return 0;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000163
164error:
165 Py_XDECREF(pathobj);
166 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +0000167}
168
169/* GC support. */
170static int
171zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
172{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 ZipImporter *self = (ZipImporter *)obj;
174 Py_VISIT(self->files);
175 return 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000176}
177
178static void
179zipimporter_dealloc(ZipImporter *self)
180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 PyObject_GC_UnTrack(self);
182 Py_XDECREF(self->archive);
183 Py_XDECREF(self->prefix);
184 Py_XDECREF(self->files);
185 Py_TYPE(self)->tp_free((PyObject *)self);
Just van Rossum52e14d62002-12-30 22:08:05 +0000186}
187
188static PyObject *
189zipimporter_repr(ZipImporter *self)
190{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 char *archive = "???";
192 char *prefix = "";
Just van Rossum52e14d62002-12-30 22:08:05 +0000193
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000194 if (self->archive != NULL && PyUnicode_Check(self->archive))
195 archive = _PyUnicode_AsString(self->archive);
196 if (self->prefix != NULL && PyUnicode_Check(self->prefix))
197 prefix = _PyUnicode_AsString(self->prefix);
198 if (prefix != NULL && *prefix)
199 return PyUnicode_FromFormat("<zipimporter object \"%.300s%c%.150s\">",
200 archive, SEP, prefix);
201 else
202 return PyUnicode_FromFormat("<zipimporter object \"%.300s\">",
203 archive);
Just van Rossum52e14d62002-12-30 22:08:05 +0000204}
205
206/* return fullname.split(".")[-1] */
207static char *
208get_subname(char *fullname)
209{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000210 char *subname = strrchr(fullname, '.');
211 if (subname == NULL)
212 subname = fullname;
213 else
214 subname++;
215 return subname;
Just van Rossum52e14d62002-12-30 22:08:05 +0000216}
217
218/* Given a (sub)modulename, write the potential file path in the
219 archive (without extension) to the path buffer. Return the
220 length of the resulting string. */
221static int
222make_filename(char *prefix, char *name, char *path)
223{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000224 size_t len;
225 char *p;
Just van Rossum52e14d62002-12-30 22:08:05 +0000226
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000227 len = strlen(prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000228
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000229 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
230 if (len + strlen(name) + 13 >= MAXPATHLEN) {
231 PyErr_SetString(ZipImportError, "path too long");
232 return -1;
233 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 strcpy(path, prefix);
236 strcpy(path + len, name);
237 for (p = path + len; *p; p++) {
238 if (*p == '.')
239 *p = SEP;
240 }
241 len += strlen(name);
242 assert(len < INT_MAX);
243 return (int)len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000244}
245
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000246enum zi_module_info {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 MI_ERROR,
248 MI_NOT_FOUND,
249 MI_MODULE,
250 MI_PACKAGE
Just van Rossum52e14d62002-12-30 22:08:05 +0000251};
252
253/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000254static enum zi_module_info
Just van Rossum52e14d62002-12-30 22:08:05 +0000255get_module_info(ZipImporter *self, char *fullname)
256{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000257 char *subname, path[MAXPATHLEN + 1];
258 int len;
259 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +0000260
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000261 subname = get_subname(fullname);
Just van Rossum52e14d62002-12-30 22:08:05 +0000262
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000263 len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
264 if (len < 0)
265 return MI_ERROR;
Just van Rossum52e14d62002-12-30 22:08:05 +0000266
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 for (zso = zip_searchorder; *zso->suffix; zso++) {
268 strcpy(path + len, zso->suffix);
269 if (PyDict_GetItemString(self->files, path) != NULL) {
270 if (zso->type & IS_PACKAGE)
271 return MI_PACKAGE;
272 else
273 return MI_MODULE;
274 }
275 }
276 return MI_NOT_FOUND;
Just van Rossum52e14d62002-12-30 22:08:05 +0000277}
278
279/* Check whether we can satisfy the import of the module named by
280 'fullname'. Return self if we can, None if we can't. */
281static PyObject *
282zipimporter_find_module(PyObject *obj, PyObject *args)
283{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000284 ZipImporter *self = (ZipImporter *)obj;
285 PyObject *path = NULL;
286 char *fullname;
287 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000288
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000289 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
290 &fullname, &path))
291 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000292
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 mi = get_module_info(self, fullname);
294 if (mi == MI_ERROR)
295 return NULL;
296 if (mi == MI_NOT_FOUND) {
297 Py_INCREF(Py_None);
298 return Py_None;
299 }
300 Py_INCREF(self);
301 return (PyObject *)self;
Just van Rossum52e14d62002-12-30 22:08:05 +0000302}
303
304/* Load and return the module named by 'fullname'. */
305static PyObject *
306zipimporter_load_module(PyObject *obj, PyObject *args)
307{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000308 ZipImporter *self = (ZipImporter *)obj;
309 PyObject *code, *mod, *dict;
310 char *fullname, *modpath;
311 int ispackage;
Just van Rossum52e14d62002-12-30 22:08:05 +0000312
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000313 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
314 &fullname))
315 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000316
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000317 code = get_module_code(self, fullname, &ispackage, &modpath);
318 if (code == NULL)
319 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000320
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000321 mod = PyImport_AddModule(fullname);
322 if (mod == NULL) {
323 Py_DECREF(code);
324 return NULL;
325 }
326 dict = PyModule_GetDict(mod);
Just van Rossum52e14d62002-12-30 22:08:05 +0000327
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000328 /* mod.__loader__ = self */
329 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
330 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000331
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000332 if (ispackage) {
333 /* add __path__ to the module *before* the code gets
334 executed */
335 PyObject *pkgpath, *fullpath;
336 char *subname = get_subname(fullname);
337 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000338
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 fullpath = PyUnicode_FromFormat("%U%c%U%s",
340 self->archive, SEP,
341 self->prefix, subname);
342 if (fullpath == NULL)
343 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000344
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000345 pkgpath = Py_BuildValue("[O]", fullpath);
346 Py_DECREF(fullpath);
347 if (pkgpath == NULL)
348 goto error;
349 err = PyDict_SetItemString(dict, "__path__", pkgpath);
350 Py_DECREF(pkgpath);
351 if (err != 0)
352 goto error;
353 }
354 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
355 Py_DECREF(code);
356 if (Py_VerboseFlag)
357 PySys_WriteStderr("import %s # loaded from Zip %s\n",
358 fullname, modpath);
359 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +0000360error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 Py_DECREF(code);
362 Py_DECREF(mod);
363 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000364}
365
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000366/* Return a string matching __file__ for the named module */
367static PyObject *
368zipimporter_get_filename(PyObject *obj, PyObject *args)
369{
370 ZipImporter *self = (ZipImporter *)obj;
371 PyObject *code;
372 char *fullname, *modpath;
373 int ispackage;
374
Nick Coghlan9a1d6e32009-02-08 03:37:27 +0000375 if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000376 &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000378
379 /* Deciding the filename requires working out where the code
380 would come from if the module was actually loaded */
381 code = get_module_code(self, fullname, &ispackage, &modpath);
382 if (code == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000383 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000384 Py_DECREF(code); /* Only need the path info */
385
386 return PyUnicode_FromString(modpath);
387}
388
Just van Rossum52e14d62002-12-30 22:08:05 +0000389/* Return a bool signifying whether the module is a package or not. */
390static PyObject *
391zipimporter_is_package(PyObject *obj, PyObject *args)
392{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 ZipImporter *self = (ZipImporter *)obj;
394 char *fullname;
395 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000396
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
398 &fullname))
399 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000400
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000401 mi = get_module_info(self, fullname);
402 if (mi == MI_ERROR)
403 return NULL;
404 if (mi == MI_NOT_FOUND) {
405 PyErr_Format(ZipImportError, "can't find module '%.200s'",
406 fullname);
407 return NULL;
408 }
409 return PyBool_FromLong(mi == MI_PACKAGE);
Just van Rossum52e14d62002-12-30 22:08:05 +0000410}
411
412static PyObject *
413zipimporter_get_data(PyObject *obj, PyObject *args)
414{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 ZipImporter *self = (ZipImporter *)obj;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000416 PyObject *pathobj, *key;
417 const Py_UNICODE *path;
Just van Rossum52e14d62002-12-30 22:08:05 +0000418#ifdef ALTSEP
Victor Stinner60fe8d92010-08-16 23:48:11 +0000419 Py_UNICODE *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000420#endif
Victor Stinner60fe8d92010-08-16 23:48:11 +0000421 Py_UNICODE *archive;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000422 PyObject *toc_entry;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000423 Py_ssize_t path_len, len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000424
Victor Stinner60fe8d92010-08-16 23:48:11 +0000425 if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &pathobj))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000426 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000427
Victor Stinner60fe8d92010-08-16 23:48:11 +0000428 path_len = PyUnicode_GET_SIZE(pathobj);
429 path = PyUnicode_AS_UNICODE(pathobj);
Just van Rossum52e14d62002-12-30 22:08:05 +0000430#ifdef ALTSEP
Victor Stinner60fe8d92010-08-16 23:48:11 +0000431 if (path_len >= MAXPATHLEN) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000432 PyErr_SetString(ZipImportError, "path too long");
433 return NULL;
434 }
Victor Stinner60fe8d92010-08-16 23:48:11 +0000435 Py_UNICODE_strcpy(buf, path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000436 for (p = buf; *p; p++) {
437 if (*p == ALTSEP)
438 *p = SEP;
439 }
440 path = buf;
Just van Rossum52e14d62002-12-30 22:08:05 +0000441#endif
Victor Stinner60fe8d92010-08-16 23:48:11 +0000442 archive = PyUnicode_AS_UNICODE(self->archive);
443 len = PyUnicode_GET_SIZE(self->archive);
444 if ((size_t)len < Py_UNICODE_strlen(path) &&
445 Py_UNICODE_strncmp(path, archive, len) == 0 &&
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 path[len] == SEP) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000447 path += len + 1;
448 path_len -= len + 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000450
Victor Stinner60fe8d92010-08-16 23:48:11 +0000451 key = PyUnicode_FromUnicode(path, path_len);
452 if (key == NULL)
453 return NULL;
454 toc_entry = PyDict_GetItem(self->files, key);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 if (toc_entry == NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000456 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key);
457 Py_DECREF(key);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000458 return NULL;
459 }
Victor Stinner60fe8d92010-08-16 23:48:11 +0000460 Py_DECREF(key);
461 return get_data(self->archive, toc_entry);
Just van Rossum52e14d62002-12-30 22:08:05 +0000462}
463
464static PyObject *
465zipimporter_get_code(PyObject *obj, PyObject *args)
466{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000467 ZipImporter *self = (ZipImporter *)obj;
468 char *fullname;
Just van Rossum52e14d62002-12-30 22:08:05 +0000469
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000470 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
471 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000472
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000473 return get_module_code(self, fullname, NULL, NULL);
Just van Rossum52e14d62002-12-30 22:08:05 +0000474}
475
476static PyObject *
477zipimporter_get_source(PyObject *obj, PyObject *args)
478{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000479 ZipImporter *self = (ZipImporter *)obj;
480 PyObject *toc_entry;
481 char *fullname, *subname, path[MAXPATHLEN+1];
482 int len;
483 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000484
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000485 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
486 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000487
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000488 mi = get_module_info(self, fullname);
489 if (mi == MI_ERROR)
490 return NULL;
491 if (mi == MI_NOT_FOUND) {
492 PyErr_Format(ZipImportError, "can't find module '%.200s'",
493 fullname);
494 return NULL;
495 }
496 subname = get_subname(fullname);
Just van Rossum52e14d62002-12-30 22:08:05 +0000497
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
499 if (len < 0)
500 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000502 if (mi == MI_PACKAGE) {
503 path[len] = SEP;
504 strcpy(path + len + 1, "__init__.py");
505 }
506 else
507 strcpy(path + len, ".py");
Just van Rossum52e14d62002-12-30 22:08:05 +0000508
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000509 toc_entry = PyDict_GetItemString(self->files, path);
510 if (toc_entry != NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000511 PyObject *res, *bytes;
512 bytes = get_data(self->archive, toc_entry);
513 if (bytes == NULL)
514 return NULL;
515 res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
516 PyBytes_GET_SIZE(bytes));
517 Py_DECREF(bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000518 return res;
519 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000520
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000521 /* we have the module, but no source */
522 Py_INCREF(Py_None);
523 return Py_None;
Just van Rossum52e14d62002-12-30 22:08:05 +0000524}
525
526PyDoc_STRVAR(doc_find_module,
527"find_module(fullname, path=None) -> self or None.\n\
528\n\
529Search for a module specified by 'fullname'. 'fullname' must be the\n\
530fully qualified (dotted) module name. It returns the zipimporter\n\
531instance itself if the module was found, or None if it wasn't.\n\
532The optional 'path' argument is ignored -- it's there for compatibility\n\
533with the importer protocol.");
534
535PyDoc_STRVAR(doc_load_module,
536"load_module(fullname) -> module.\n\
537\n\
538Load the module specified by 'fullname'. 'fullname' must be the\n\
539fully qualified (dotted) module name. It returns the imported\n\
540module, or raises ZipImportError if it wasn't found.");
541
542PyDoc_STRVAR(doc_get_data,
543"get_data(pathname) -> string with file data.\n\
544\n\
545Return the data associated with 'pathname'. Raise IOError if\n\
546the file wasn't found.");
547
548PyDoc_STRVAR(doc_is_package,
549"is_package(fullname) -> bool.\n\
550\n\
551Return True if the module specified by fullname is a package.\n\
Brian Curtin32839732010-07-21 01:44:19 +0000552Raise ZipImportError if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000553
554PyDoc_STRVAR(doc_get_code,
555"get_code(fullname) -> code object.\n\
556\n\
557Return the code object for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000558if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000559
560PyDoc_STRVAR(doc_get_source,
561"get_source(fullname) -> source string.\n\
562\n\
563Return the source code for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000564if the module couldn't be found, return None if the archive does\n\
Just van Rossum52e14d62002-12-30 22:08:05 +0000565contain the module, but has no source for it.");
566
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000567
568PyDoc_STRVAR(doc_get_filename,
Nick Coghlan9a1d6e32009-02-08 03:37:27 +0000569"get_filename(fullname) -> filename string.\n\
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000570\n\
571Return the filename for the specified module.");
572
Just van Rossum52e14d62002-12-30 22:08:05 +0000573static PyMethodDef zipimporter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000574 {"find_module", zipimporter_find_module, METH_VARARGS,
575 doc_find_module},
576 {"load_module", zipimporter_load_module, METH_VARARGS,
577 doc_load_module},
578 {"get_data", zipimporter_get_data, METH_VARARGS,
579 doc_get_data},
580 {"get_code", zipimporter_get_code, METH_VARARGS,
581 doc_get_code},
582 {"get_source", zipimporter_get_source, METH_VARARGS,
583 doc_get_source},
584 {"get_filename", zipimporter_get_filename, METH_VARARGS,
585 doc_get_filename},
586 {"is_package", zipimporter_is_package, METH_VARARGS,
587 doc_is_package},
588 {NULL, NULL} /* sentinel */
Just van Rossum52e14d62002-12-30 22:08:05 +0000589};
590
591static PyMemberDef zipimporter_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
593 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
594 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
595 {NULL}
Just van Rossum52e14d62002-12-30 22:08:05 +0000596};
597
598PyDoc_STRVAR(zipimporter_doc,
599"zipimporter(archivepath) -> zipimporter object\n\
600\n\
601Create a new zipimporter instance. 'archivepath' must be a path to\n\
Alexandre Vassalotti8ae3e052008-05-16 00:41:41 +0000602a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
603'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
604valid directory inside the archive.\n\
605\n\
606'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
607archive.\n\
608\n\
609The 'archive' attribute of zipimporter objects contains the name of the\n\
610zipfile targeted.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000611
612#define DEFERRED_ADDRESS(ADDR) 0
613
614static PyTypeObject ZipImporter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000615 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
616 "zipimport.zipimporter",
617 sizeof(ZipImporter),
618 0, /* tp_itemsize */
619 (destructor)zipimporter_dealloc, /* tp_dealloc */
620 0, /* tp_print */
621 0, /* tp_getattr */
622 0, /* tp_setattr */
623 0, /* tp_reserved */
624 (reprfunc)zipimporter_repr, /* tp_repr */
625 0, /* tp_as_number */
626 0, /* tp_as_sequence */
627 0, /* tp_as_mapping */
628 0, /* tp_hash */
629 0, /* tp_call */
630 0, /* tp_str */
631 PyObject_GenericGetAttr, /* tp_getattro */
632 0, /* tp_setattro */
633 0, /* tp_as_buffer */
634 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
635 Py_TPFLAGS_HAVE_GC, /* tp_flags */
636 zipimporter_doc, /* tp_doc */
637 zipimporter_traverse, /* tp_traverse */
638 0, /* tp_clear */
639 0, /* tp_richcompare */
640 0, /* tp_weaklistoffset */
641 0, /* tp_iter */
642 0, /* tp_iternext */
643 zipimporter_methods, /* tp_methods */
644 zipimporter_members, /* tp_members */
645 0, /* tp_getset */
646 0, /* tp_base */
647 0, /* tp_dict */
648 0, /* tp_descr_get */
649 0, /* tp_descr_set */
650 0, /* tp_dictoffset */
651 (initproc)zipimporter_init, /* tp_init */
652 PyType_GenericAlloc, /* tp_alloc */
653 PyType_GenericNew, /* tp_new */
654 PyObject_GC_Del, /* tp_free */
Just van Rossum52e14d62002-12-30 22:08:05 +0000655};
656
657
658/* implementation */
659
Just van Rossum52e14d62002-12-30 22:08:05 +0000660/* Given a buffer, return the long that is represented by the first
661 4 bytes, encoded as little endian. This partially reimplements
662 marshal.c:r_long() */
663static long
664get_long(unsigned char *buf) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000665 long x;
666 x = buf[0];
667 x |= (long)buf[1] << 8;
668 x |= (long)buf[2] << 16;
669 x |= (long)buf[3] << 24;
Just van Rossum52e14d62002-12-30 22:08:05 +0000670#if SIZEOF_LONG > 4
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000671 /* Sign extension for 64-bit machines */
672 x |= -(x & 0x80000000L);
Just van Rossum52e14d62002-12-30 22:08:05 +0000673#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 return x;
Just van Rossum52e14d62002-12-30 22:08:05 +0000675}
676
677/*
678 read_directory(archive) -> files dict (new reference)
679
680 Given a path to a Zip archive, build a dict, mapping file names
681 (local to the archive, using SEP as a separator) to toc entries.
682
683 A toc_entry is a tuple:
684
Fred Drakef5b7fd22005-11-11 19:34:56 +0000685 (__file__, # value to use for __file__, available for all files
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000686 compress, # compression kind; 0 for uncompressed
687 data_size, # size of compressed data on disk
688 file_size, # size of decompressed data
689 file_offset, # offset of file header from start of archive
690 time, # mod time of file (in dos format)
691 date, # mod data of file (in dos format)
692 crc, # crc checksum of the data
Just van Rossum52e14d62002-12-30 22:08:05 +0000693 )
694
695 Directories can be recognized by the trailing SEP in the name,
696 data_size and file_offset are 0.
697*/
698static PyObject *
Victor Stinner2460a432010-08-16 17:54:28 +0000699read_directory(PyObject *archive_obj)
Just van Rossum52e14d62002-12-30 22:08:05 +0000700{
Victor Stinner2460a432010-08-16 17:54:28 +0000701 /* FIXME: work on Py_UNICODE* instead of char* */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000702 PyObject *files = NULL;
703 FILE *fp;
704 long compress, crc, data_size, file_size, file_offset, date, time;
705 long header_offset, name_size, header_size, header_position;
706 long i, l, count;
707 size_t length;
Victor Stinner2460a432010-08-16 17:54:28 +0000708 Py_UNICODE path[MAXPATHLEN + 5];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000709 char name[MAXPATHLEN + 5];
Victor Stinner2460a432010-08-16 17:54:28 +0000710 PyObject *nameobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000711 char *p, endof_central_dir[22];
712 long arc_offset; /* offset from beginning of file to start of zip-archive */
Victor Stinner2460a432010-08-16 17:54:28 +0000713 PyObject *pathobj;
Just van Rossum52e14d62002-12-30 22:08:05 +0000714
Victor Stinner2460a432010-08-16 17:54:28 +0000715 if (PyUnicode_GET_SIZE(archive_obj) > MAXPATHLEN) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000716 PyErr_SetString(PyExc_OverflowError,
717 "Zip path name is too long");
718 return NULL;
719 }
Victor Stinner2460a432010-08-16 17:54:28 +0000720 Py_UNICODE_strcpy(path, PyUnicode_AS_UNICODE(archive_obj));
Just van Rossum52e14d62002-12-30 22:08:05 +0000721
Victor Stinner2460a432010-08-16 17:54:28 +0000722 fp = _Py_fopen(archive_obj, "rb");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000723 if (fp == NULL) {
724 PyErr_Format(ZipImportError, "can't open Zip file: "
Victor Stinner2460a432010-08-16 17:54:28 +0000725 "'%.200U'", archive_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 return NULL;
727 }
728 fseek(fp, -22, SEEK_END);
729 header_position = ftell(fp);
730 if (fread(endof_central_dir, 1, 22, fp) != 22) {
731 fclose(fp);
732 PyErr_Format(ZipImportError, "can't read Zip file: "
Victor Stinner2460a432010-08-16 17:54:28 +0000733 "'%.200U'", archive_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000734 return NULL;
735 }
736 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
737 /* Bad: End of Central Dir signature */
738 fclose(fp);
739 PyErr_Format(ZipImportError, "not a Zip file: "
Victor Stinner2460a432010-08-16 17:54:28 +0000740 "'%.200U'", archive_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000741 return NULL;
742 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000743
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 header_size = get_long((unsigned char *)endof_central_dir + 12);
745 header_offset = get_long((unsigned char *)endof_central_dir + 16);
746 arc_offset = header_position - header_offset - header_size;
747 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000748
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000749 files = PyDict_New();
750 if (files == NULL)
751 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000752
Victor Stinner2460a432010-08-16 17:54:28 +0000753 length = Py_UNICODE_strlen(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000754 path[length] = SEP;
Just van Rossum52e14d62002-12-30 22:08:05 +0000755
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000756 /* Start of Central Directory */
757 count = 0;
758 for (;;) {
759 PyObject *t;
760 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000762 fseek(fp, header_offset, 0); /* Start of file header */
763 l = PyMarshal_ReadLongFromFile(fp);
764 if (l != 0x02014B50)
765 break; /* Bad: Central Dir File Header */
766 fseek(fp, header_offset + 10, 0);
767 compress = PyMarshal_ReadShortFromFile(fp);
768 time = PyMarshal_ReadShortFromFile(fp);
769 date = PyMarshal_ReadShortFromFile(fp);
770 crc = PyMarshal_ReadLongFromFile(fp);
771 data_size = PyMarshal_ReadLongFromFile(fp);
772 file_size = PyMarshal_ReadLongFromFile(fp);
773 name_size = PyMarshal_ReadShortFromFile(fp);
774 header_size = 46 + name_size +
775 PyMarshal_ReadShortFromFile(fp) +
776 PyMarshal_ReadShortFromFile(fp);
777 fseek(fp, header_offset + 42, 0);
778 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
779 if (name_size > MAXPATHLEN)
780 name_size = MAXPATHLEN;
Just van Rossum52e14d62002-12-30 22:08:05 +0000781
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000782 p = name;
783 for (i = 0; i < name_size; i++) {
784 *p = (char)getc(fp);
785 if (*p == '/')
786 *p = SEP;
787 p++;
788 }
789 *p = 0; /* Add terminating null byte */
790 header_offset += header_size;
Just van Rossum52e14d62002-12-30 22:08:05 +0000791
Victor Stinner2460a432010-08-16 17:54:28 +0000792 nameobj = PyUnicode_DecodeFSDefaultAndSize(name, name_size);
793 if (nameobj == NULL)
794 goto error;
795 Py_UNICODE_strncpy(path + length + 1, PyUnicode_AS_UNICODE(nameobj), MAXPATHLEN - length - 1);
Just van Rossum52e14d62002-12-30 22:08:05 +0000796
Victor Stinner2460a432010-08-16 17:54:28 +0000797 pathobj = PyUnicode_FromUnicode(path, Py_UNICODE_strlen(path));
798 if (pathobj == NULL)
799 goto error;
800 t = Py_BuildValue("Niiiiiii", pathobj, compress, data_size,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000801 file_size, file_offset, time, date, crc);
802 if (t == NULL)
803 goto error;
Victor Stinner2460a432010-08-16 17:54:28 +0000804 err = PyDict_SetItem(files, nameobj, t);
805 Py_CLEAR(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000806 Py_DECREF(t);
807 if (err != 0)
808 goto error;
809 count++;
810 }
811 fclose(fp);
812 if (Py_VerboseFlag)
Victor Stinner2460a432010-08-16 17:54:28 +0000813 PySys_FormatStderr("# zipimport: found %ld names in %U\n",
814 count, archive_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 return files;
Just van Rossum52e14d62002-12-30 22:08:05 +0000816error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000817 fclose(fp);
818 Py_XDECREF(files);
Victor Stinner2460a432010-08-16 17:54:28 +0000819 Py_XDECREF(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000820 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000821}
822
823/* Return the zlib.decompress function object, or NULL if zlib couldn't
824 be imported. The function is cached when found, so subsequent calls
825 don't import zlib again. Returns a *borrowed* reference.
826 XXX This makes zlib.decompress immortal. */
827static PyObject *
828get_decompress_func(void)
829{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000830 static PyObject *decompress = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000831
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000832 if (decompress == NULL) {
833 PyObject *zlib;
834 static int importing_zlib = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000835
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000836 if (importing_zlib != 0)
837 /* Someone has a zlib.py[co] in their Zip file;
838 let's avoid a stack overflow. */
839 return NULL;
840 importing_zlib = 1;
841 zlib = PyImport_ImportModuleNoBlock("zlib");
842 importing_zlib = 0;
843 if (zlib != NULL) {
844 decompress = PyObject_GetAttrString(zlib,
845 "decompress");
846 Py_DECREF(zlib);
847 }
848 else
849 PyErr_Clear();
850 if (Py_VerboseFlag)
851 PySys_WriteStderr("# zipimport: zlib %s\n",
852 zlib != NULL ? "available": "UNAVAILABLE");
853 }
854 return decompress;
Just van Rossum52e14d62002-12-30 22:08:05 +0000855}
856
857/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
858 data as a new reference. */
859static PyObject *
Victor Stinner60fe8d92010-08-16 23:48:11 +0000860get_data(PyObject *archive, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +0000861{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 PyObject *raw_data, *data = NULL, *decompress;
863 char *buf;
864 FILE *fp;
865 int err;
866 Py_ssize_t bytes_read = 0;
867 long l;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000868 PyObject *datapath;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000869 long compress, data_size, file_size, file_offset, bytes_size;
870 long time, date, crc;
Just van Rossum52e14d62002-12-30 22:08:05 +0000871
Victor Stinner60fe8d92010-08-16 23:48:11 +0000872 if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000873 &data_size, &file_size, &file_offset, &time,
874 &date, &crc)) {
875 return NULL;
876 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000877
Victor Stinner60fe8d92010-08-16 23:48:11 +0000878 fp = _Py_fopen(archive, "rb");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000879 if (!fp) {
880 PyErr_Format(PyExc_IOError,
Victor Stinner60fe8d92010-08-16 23:48:11 +0000881 "zipimport: can not open file %U", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 return NULL;
883 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000884
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000885 /* Check to make sure the local file header is correct */
886 fseek(fp, file_offset, 0);
887 l = PyMarshal_ReadLongFromFile(fp);
888 if (l != 0x04034B50) {
889 /* Bad: Local File Header */
890 PyErr_Format(ZipImportError,
Victor Stinner60fe8d92010-08-16 23:48:11 +0000891 "bad local file header in %U",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000892 archive);
893 fclose(fp);
894 return NULL;
895 }
896 fseek(fp, file_offset + 26, 0);
897 l = 30 + PyMarshal_ReadShortFromFile(fp) +
898 PyMarshal_ReadShortFromFile(fp); /* local header size */
899 file_offset += l; /* Start of file data */
Just van Rossum52e14d62002-12-30 22:08:05 +0000900
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000901 bytes_size = compress == 0 ? data_size : data_size + 1;
902 if (bytes_size == 0)
903 bytes_size++;
904 raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
Just van Rossum52e14d62002-12-30 22:08:05 +0000905
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000906 if (raw_data == NULL) {
907 fclose(fp);
908 return NULL;
909 }
910 buf = PyBytes_AsString(raw_data);
Just van Rossum52e14d62002-12-30 22:08:05 +0000911
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000912 err = fseek(fp, file_offset, 0);
913 if (err == 0)
914 bytes_read = fread(buf, 1, data_size, fp);
915 fclose(fp);
916 if (err || bytes_read != data_size) {
917 PyErr_SetString(PyExc_IOError,
918 "zipimport: can't read data");
919 Py_DECREF(raw_data);
920 return NULL;
921 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000922
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000923 if (compress != 0) {
924 buf[data_size] = 'Z'; /* saw this in zipfile.py */
925 data_size++;
926 }
927 buf[data_size] = '\0';
Just van Rossum52e14d62002-12-30 22:08:05 +0000928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000929 if (compress == 0) { /* data is not compressed */
930 data = PyBytes_FromStringAndSize(buf, data_size);
931 Py_DECREF(raw_data);
932 return data;
933 }
934
935 /* Decompress with zlib */
936 decompress = get_decompress_func();
937 if (decompress == NULL) {
938 PyErr_SetString(ZipImportError,
939 "can't decompress data; "
940 "zlib not available");
941 goto error;
942 }
943 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000944error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000945 Py_DECREF(raw_data);
946 return data;
Just van Rossum52e14d62002-12-30 22:08:05 +0000947}
948
949/* Lenient date/time comparison function. The precision of the mtime
950 in the archive is lower than the mtime stored in a .pyc: we
951 must allow a difference of at most one second. */
952static int
953eq_mtime(time_t t1, time_t t2)
954{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 time_t d = t1 - t2;
956 if (d < 0)
957 d = -d;
958 /* dostime only stores even seconds, so be lenient */
959 return d <= 1;
Just van Rossum52e14d62002-12-30 22:08:05 +0000960}
961
962/* Given the contents of a .py[co] file in a buffer, unmarshal the data
963 and return the code object. Return None if it the magic word doesn't
964 match (we do this instead of raising an exception as we fall back
965 to .py if available and we don't want to mask other errors).
966 Returns a new reference. */
967static PyObject *
968unmarshal_code(char *pathname, PyObject *data, time_t mtime)
969{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000970 PyObject *code;
971 char *buf = PyBytes_AsString(data);
972 Py_ssize_t size = PyBytes_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +0000973
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000974 if (size <= 9) {
975 PyErr_SetString(ZipImportError,
976 "bad pyc data");
977 return NULL;
978 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000979
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000980 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
981 if (Py_VerboseFlag)
982 PySys_WriteStderr("# %s has bad magic\n",
983 pathname);
984 Py_INCREF(Py_None);
985 return Py_None; /* signal caller to try alternative */
986 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000987
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000988 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
989 mtime)) {
990 if (Py_VerboseFlag)
991 PySys_WriteStderr("# %s has bad mtime\n",
992 pathname);
993 Py_INCREF(Py_None);
994 return Py_None; /* signal caller to try alternative */
995 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000996
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000997 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
998 if (code == NULL)
999 return NULL;
1000 if (!PyCode_Check(code)) {
1001 Py_DECREF(code);
1002 PyErr_Format(PyExc_TypeError,
1003 "compiled module %.200s is not a code object",
1004 pathname);
1005 return NULL;
1006 }
1007 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001008}
1009
1010/* Replace any occurances of "\r\n?" in the input string with "\n".
1011 This converts DOS and Mac line endings to Unix line endings.
1012 Also append a trailing "\n" to be compatible with
1013 PyParser_SimpleParseFile(). Returns a new reference. */
1014static PyObject *
1015normalize_line_endings(PyObject *source)
1016{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001017 char *buf, *q, *p = PyBytes_AsString(source);
1018 PyObject *fixed_source;
1019 int len = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +00001020
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001021 if (!p) {
1022 return PyBytes_FromStringAndSize("\n\0", 2);
1023 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001024
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001025 /* one char extra for trailing \n and one for terminating \0 */
1026 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1027 if (buf == NULL) {
1028 PyErr_SetString(PyExc_MemoryError,
1029 "zipimport: no memory to allocate "
1030 "source buffer");
1031 return NULL;
1032 }
1033 /* replace "\r\n?" by "\n" */
1034 for (q = buf; *p != '\0'; p++) {
1035 if (*p == '\r') {
1036 *q++ = '\n';
1037 if (*(p + 1) == '\n')
1038 p++;
1039 }
1040 else
1041 *q++ = *p;
1042 len++;
1043 }
1044 *q++ = '\n'; /* add trailing \n */
1045 *q = '\0';
1046 fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1047 PyMem_Free(buf);
1048 return fixed_source;
Just van Rossum52e14d62002-12-30 22:08:05 +00001049}
1050
1051/* Given a string buffer containing Python source code, compile it
1052 return and return a code object as a new reference. */
1053static PyObject *
1054compile_source(char *pathname, PyObject *source)
1055{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 PyObject *code, *fixed_source;
Just van Rossum52e14d62002-12-30 22:08:05 +00001057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001058 fixed_source = normalize_line_endings(source);
1059 if (fixed_source == NULL)
1060 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001062 code = Py_CompileString(PyBytes_AsString(fixed_source), pathname,
1063 Py_file_input);
1064 Py_DECREF(fixed_source);
1065 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001066}
1067
1068/* Convert the date/time values found in the Zip archive to a value
1069 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001070static time_t
1071parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001072{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001073 struct tm stm;
Just van Rossum52e14d62002-12-30 22:08:05 +00001074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 memset((void *) &stm, '\0', sizeof(stm));
Christian Heimes679db4a2008-01-18 09:56:22 +00001076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001077 stm.tm_sec = (dostime & 0x1f) * 2;
1078 stm.tm_min = (dostime >> 5) & 0x3f;
1079 stm.tm_hour = (dostime >> 11) & 0x1f;
1080 stm.tm_mday = dosdate & 0x1f;
1081 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1082 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
1083 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 return mktime(&stm);
Just van Rossum52e14d62002-12-30 22:08:05 +00001086}
1087
1088/* Given a path to a .pyc or .pyo file in the archive, return the
1089 modifictaion time of the matching .py file, or 0 if no source
1090 is available. */
1091static time_t
1092get_mtime_of_source(ZipImporter *self, char *path)
1093{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001094 PyObject *toc_entry;
1095 time_t mtime = 0;
1096 Py_ssize_t lastchar = strlen(path) - 1;
1097 char savechar = path[lastchar];
1098 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1099 toc_entry = PyDict_GetItemString(self->files, path);
1100 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1101 PyTuple_Size(toc_entry) == 8) {
1102 /* fetch the time stamp of the .py file for comparison
1103 with an embedded pyc time stamp */
1104 int time, date;
1105 time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1106 date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1107 mtime = parse_dostime(time, date);
1108 }
1109 path[lastchar] = savechar;
1110 return mtime;
Just van Rossum52e14d62002-12-30 22:08:05 +00001111}
1112
1113/* Return the code object for the module named by 'fullname' from the
1114 Zip archive as a new reference. */
1115static PyObject *
1116get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 time_t mtime, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +00001118{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001119 PyObject *data, *code;
1120 char *modpath;
Just van Rossum52e14d62002-12-30 22:08:05 +00001121
Victor Stinner60fe8d92010-08-16 23:48:11 +00001122 data = get_data(self->archive, toc_entry);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 if (data == NULL)
1124 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 modpath = _PyUnicode_AsString(PyTuple_GetItem(toc_entry, 0));
Just van Rossum52e14d62002-12-30 22:08:05 +00001127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 if (isbytecode) {
1129 code = unmarshal_code(modpath, data, mtime);
1130 }
1131 else {
1132 code = compile_source(modpath, data);
1133 }
1134 Py_DECREF(data);
1135 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001136}
1137
1138/* Get the code object assoiciated with the module specified by
1139 'fullname'. */
1140static PyObject *
1141get_module_code(ZipImporter *self, char *fullname,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 int *p_ispackage, char **p_modpath)
Just van Rossum52e14d62002-12-30 22:08:05 +00001143{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 PyObject *toc_entry;
1145 char *subname, path[MAXPATHLEN + 1];
1146 int len;
1147 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +00001148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 subname = get_subname(fullname);
Just van Rossum52e14d62002-12-30 22:08:05 +00001150
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
1152 if (len < 0)
1153 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001154
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 for (zso = zip_searchorder; *zso->suffix; zso++) {
1156 PyObject *code = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 strcpy(path + len, zso->suffix);
1159 if (Py_VerboseFlag > 1)
1160 PySys_WriteStderr("# trying %s%c%s\n",
1161 _PyUnicode_AsString(self->archive),
1162 (int)SEP, path);
1163 toc_entry = PyDict_GetItemString(self->files, path);
1164 if (toc_entry != NULL) {
1165 time_t mtime = 0;
1166 int ispackage = zso->type & IS_PACKAGE;
1167 int isbytecode = zso->type & IS_BYTECODE;
Just van Rossum52e14d62002-12-30 22:08:05 +00001168
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 if (isbytecode)
1170 mtime = get_mtime_of_source(self, path);
1171 if (p_ispackage != NULL)
1172 *p_ispackage = ispackage;
1173 code = get_code_from_data(self, ispackage,
1174 isbytecode, mtime,
1175 toc_entry);
1176 if (code == Py_None) {
1177 /* bad magic number or non-matching mtime
1178 in byte code, try next */
1179 Py_DECREF(code);
1180 continue;
1181 }
1182 if (code != NULL && p_modpath != NULL)
1183 *p_modpath = _PyUnicode_AsString(
1184 PyTuple_GetItem(toc_entry, 0));
1185 return code;
1186 }
1187 }
1188 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1189 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001190}
1191
1192
1193/* Module init */
1194
1195PyDoc_STRVAR(zipimport_doc,
1196"zipimport provides support for importing Python modules from Zip archives.\n\
1197\n\
1198This module exports three objects:\n\
1199- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001200- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001201 subclass of ImportError, so it can be caught as ImportError, too.\n\
1202- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1203 info dicts, as used in zipimporter._files.\n\
1204\n\
1205It is usually not needed to use the zipimport module explicitly; it is\n\
1206used by the builtin import mechanism for sys.path items that are paths\n\
1207to Zip archives.");
1208
Martin v. Löwis1a214512008-06-11 05:26:20 +00001209static struct PyModuleDef zipimportmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 PyModuleDef_HEAD_INIT,
1211 "zipimport",
1212 zipimport_doc,
1213 -1,
1214 NULL,
1215 NULL,
1216 NULL,
1217 NULL,
1218 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001219};
1220
Just van Rossum52e14d62002-12-30 22:08:05 +00001221PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001222PyInit_zipimport(void)
Just van Rossum52e14d62002-12-30 22:08:05 +00001223{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001224 PyObject *mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001225
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 if (PyType_Ready(&ZipImporter_Type) < 0)
1227 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001228
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 /* Correct directory separator */
1230 zip_searchorder[0].suffix[0] = SEP;
1231 zip_searchorder[1].suffix[0] = SEP;
1232 zip_searchorder[2].suffix[0] = SEP;
1233 if (Py_OptimizeFlag) {
1234 /* Reverse *.pyc and *.pyo */
1235 struct st_zip_searchorder tmp;
1236 tmp = zip_searchorder[0];
1237 zip_searchorder[0] = zip_searchorder[1];
1238 zip_searchorder[1] = tmp;
1239 tmp = zip_searchorder[3];
1240 zip_searchorder[3] = zip_searchorder[4];
1241 zip_searchorder[4] = tmp;
1242 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001243
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 mod = PyModule_Create(&zipimportmodule);
1245 if (mod == NULL)
1246 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001247
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001248 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1249 PyExc_ImportError, NULL);
1250 if (ZipImportError == NULL)
1251 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001252
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001253 Py_INCREF(ZipImportError);
1254 if (PyModule_AddObject(mod, "ZipImportError",
1255 ZipImportError) < 0)
1256 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001257
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001258 Py_INCREF(&ZipImporter_Type);
1259 if (PyModule_AddObject(mod, "zipimporter",
1260 (PyObject *)&ZipImporter_Type) < 0)
1261 return NULL;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001262
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001263 zip_directory_cache = PyDict_New();
1264 if (zip_directory_cache == NULL)
1265 return NULL;
1266 Py_INCREF(zip_directory_cache);
1267 if (PyModule_AddObject(mod, "_zip_directory_cache",
1268 zip_directory_cache) < 0)
1269 return NULL;
1270 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001271}