blob: d1c939f984f5d185658196cd81da24704c0993d2 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000013 char suffix[14];
14 int type;
Just van Rossum52e14d62002-12-30 22:08:05 +000015};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
Just van Rossum52e14d62002-12-30 22:08:05 +000030};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000037 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
Just van Rossum52e14d62002-12-30 22:08:05 +000041};
42
Just van Rossum52e14d62002-12-30 22:08:05 +000043static PyObject *ZipImportError;
44static PyObject *zip_directory_cache = NULL;
45
46/* forward decls */
Victor Stinner2460a432010-08-16 17:54:28 +000047static PyObject *read_directory(PyObject *archive);
Victor Stinner60fe8d92010-08-16 23:48:11 +000048static PyObject *get_data(PyObject *archive, PyObject *toc_entry);
Just van Rossum52e14d62002-12-30 22:08:05 +000049static PyObject *get_module_code(ZipImporter *self, char *fullname,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000050 int *p_ispackage, char **p_modpath);
Just van Rossum52e14d62002-12-30 22:08:05 +000051
52
53#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
54
55
56/* zipimporter.__init__
57 Split the "subdirectory" from the Zip archive path, lookup a matching
58 entry in sys.path_importer_cache, fetch the file directory from there
59 if found, or else read it from the archive. */
60static int
61zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
62{
Victor Stinner2460a432010-08-16 17:54:28 +000063 PyObject *pathobj, *files;
Victor Stinner2b8dab72010-08-14 14:54:10 +000064 Py_UNICODE *path, *p, *prefix, buf[MAXPATHLEN+2];
65 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +000066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 if (!_PyArg_NoKeywords("zipimporter()", kwds))
68 return -1;
Georg Brandl02c42872005-08-26 06:42:30 +000069
Victor Stinner2b8dab72010-08-14 14:54:10 +000070 if (!PyArg_ParseTuple(args, "O&:zipimporter",
71 PyUnicode_FSDecoder, &pathobj))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +000073
Victor Stinner2b8dab72010-08-14 14:54:10 +000074 /* copy path to buf */
75 len = PyUnicode_GET_SIZE(pathobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 if (len == 0) {
77 PyErr_SetString(ZipImportError, "archive path is empty");
Victor Stinner2b8dab72010-08-14 14:54:10 +000078 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 }
80 if (len >= MAXPATHLEN) {
81 PyErr_SetString(ZipImportError,
82 "archive path too long");
Victor Stinner2b8dab72010-08-14 14:54:10 +000083 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 }
Victor Stinner2b8dab72010-08-14 14:54:10 +000085 Py_UNICODE_strcpy(buf, PyUnicode_AS_UNICODE(pathobj));
Just van Rossum52e14d62002-12-30 22:08:05 +000086
87#ifdef ALTSEP
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000088 for (p = buf; *p; p++) {
89 if (*p == ALTSEP)
90 *p = SEP;
91 }
Just van Rossum52e14d62002-12-30 22:08:05 +000092#endif
93
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 path = NULL;
95 prefix = NULL;
96 for (;;) {
97 struct stat statbuf;
98 int rv;
Just van Rossum52e14d62002-12-30 22:08:05 +000099
Victor Stinner2b8dab72010-08-14 14:54:10 +0000100 if (pathobj == NULL) {
101 pathobj = PyUnicode_FromUnicode(buf, len);
102 if (pathobj == NULL)
103 goto error;
104 }
105 rv = _Py_stat(pathobj, &statbuf);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 if (rv == 0) {
107 /* it exists */
108 if (S_ISREG(statbuf.st_mode))
109 /* it's a file */
110 path = buf;
111 break;
112 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000113 else if (PyErr_Occurred())
114 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 /* back up one path element */
Victor Stinner2b8dab72010-08-14 14:54:10 +0000116 p = Py_UNICODE_strrchr(buf, SEP);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000117 if (prefix != NULL)
118 *prefix = SEP;
119 if (p == NULL)
120 break;
121 *p = '\0';
Victor Stinner2b8dab72010-08-14 14:54:10 +0000122 len = p - buf;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000123 prefix = p;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000124 Py_CLEAR(pathobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000126 if (path == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000127 PyErr_SetString(ZipImportError, "not a Zip file");
Victor Stinner2b8dab72010-08-14 14:54:10 +0000128 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000130
Victor Stinner2b8dab72010-08-14 14:54:10 +0000131 files = PyDict_GetItem(zip_directory_cache, pathobj);
132 if (files == NULL) {
Victor Stinner2460a432010-08-16 17:54:28 +0000133 files = read_directory(pathobj);
Victor Stinner2b8dab72010-08-14 14:54:10 +0000134 if (files == NULL)
135 goto error;
136 if (PyDict_SetItem(zip_directory_cache, pathobj, files) != 0)
137 goto error;
138 }
139 else
140 Py_INCREF(files);
141 self->files = files;
142
143 self->archive = pathobj;
144 pathobj = NULL;
145
146 if (prefix != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 prefix++;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000148 len = Py_UNICODE_strlen(prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 if (prefix[len-1] != SEP) {
150 /* add trailing SEP */
151 prefix[len] = SEP;
152 prefix[len + 1] = '\0';
Victor Stinner2b8dab72010-08-14 14:54:10 +0000153 len++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 }
155 }
Victor Stinner2b8dab72010-08-14 14:54:10 +0000156 else
157 len = 0;
158 self->prefix = PyUnicode_FromUnicode(prefix, len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 if (self->prefix == NULL)
Victor Stinner2b8dab72010-08-14 14:54:10 +0000160 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000161
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000162 return 0;
Victor Stinner2b8dab72010-08-14 14:54:10 +0000163
164error:
165 Py_XDECREF(pathobj);
166 return -1;
Just van Rossum52e14d62002-12-30 22:08:05 +0000167}
168
169/* GC support. */
170static int
171zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
172{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 ZipImporter *self = (ZipImporter *)obj;
174 Py_VISIT(self->files);
175 return 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000176}
177
178static void
179zipimporter_dealloc(ZipImporter *self)
180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 PyObject_GC_UnTrack(self);
182 Py_XDECREF(self->archive);
183 Py_XDECREF(self->prefix);
184 Py_XDECREF(self->files);
185 Py_TYPE(self)->tp_free((PyObject *)self);
Just van Rossum52e14d62002-12-30 22:08:05 +0000186}
187
188static PyObject *
189zipimporter_repr(ZipImporter *self)
190{
Victor Stinner028dd972010-08-17 00:04:48 +0000191 if (self->archive == NULL)
192 return PyUnicode_FromString("<zipimporter object \"???\">");
193 else if (self->prefix != NULL && PyUnicode_GET_SIZE(self->prefix) != 0)
194 return PyUnicode_FromFormat("<zipimporter object \"%.300U%c%.150U\">",
195 self->archive, SEP, self->prefix);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196 else
Victor Stinner028dd972010-08-17 00:04:48 +0000197 return PyUnicode_FromFormat("<zipimporter object \"%.300U\">",
198 self->archive);
Just van Rossum52e14d62002-12-30 22:08:05 +0000199}
200
201/* return fullname.split(".")[-1] */
202static char *
203get_subname(char *fullname)
204{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000205 char *subname = strrchr(fullname, '.');
206 if (subname == NULL)
207 subname = fullname;
208 else
209 subname++;
210 return subname;
Just van Rossum52e14d62002-12-30 22:08:05 +0000211}
212
213/* Given a (sub)modulename, write the potential file path in the
214 archive (without extension) to the path buffer. Return the
215 length of the resulting string. */
216static int
217make_filename(char *prefix, char *name, char *path)
218{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000219 size_t len;
220 char *p;
Just van Rossum52e14d62002-12-30 22:08:05 +0000221
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000222 len = strlen(prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000223
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000224 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
225 if (len + strlen(name) + 13 >= MAXPATHLEN) {
226 PyErr_SetString(ZipImportError, "path too long");
227 return -1;
228 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 strcpy(path, prefix);
231 strcpy(path + len, name);
232 for (p = path + len; *p; p++) {
233 if (*p == '.')
234 *p = SEP;
235 }
236 len += strlen(name);
237 assert(len < INT_MAX);
238 return (int)len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000239}
240
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000241enum zi_module_info {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000242 MI_ERROR,
243 MI_NOT_FOUND,
244 MI_MODULE,
245 MI_PACKAGE
Just van Rossum52e14d62002-12-30 22:08:05 +0000246};
247
248/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000249static enum zi_module_info
Just van Rossum52e14d62002-12-30 22:08:05 +0000250get_module_info(ZipImporter *self, char *fullname)
251{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 char *subname, path[MAXPATHLEN + 1];
253 int len;
254 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +0000255
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 subname = get_subname(fullname);
Just van Rossum52e14d62002-12-30 22:08:05 +0000257
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
259 if (len < 0)
260 return MI_ERROR;
Just van Rossum52e14d62002-12-30 22:08:05 +0000261
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000262 for (zso = zip_searchorder; *zso->suffix; zso++) {
263 strcpy(path + len, zso->suffix);
264 if (PyDict_GetItemString(self->files, path) != NULL) {
265 if (zso->type & IS_PACKAGE)
266 return MI_PACKAGE;
267 else
268 return MI_MODULE;
269 }
270 }
271 return MI_NOT_FOUND;
Just van Rossum52e14d62002-12-30 22:08:05 +0000272}
273
274/* Check whether we can satisfy the import of the module named by
275 'fullname'. Return self if we can, None if we can't. */
276static PyObject *
277zipimporter_find_module(PyObject *obj, PyObject *args)
278{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 ZipImporter *self = (ZipImporter *)obj;
280 PyObject *path = NULL;
281 char *fullname;
282 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000283
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000284 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
285 &fullname, &path))
286 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000287
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000288 mi = get_module_info(self, fullname);
289 if (mi == MI_ERROR)
290 return NULL;
291 if (mi == MI_NOT_FOUND) {
292 Py_INCREF(Py_None);
293 return Py_None;
294 }
295 Py_INCREF(self);
296 return (PyObject *)self;
Just van Rossum52e14d62002-12-30 22:08:05 +0000297}
298
299/* Load and return the module named by 'fullname'. */
300static PyObject *
301zipimporter_load_module(PyObject *obj, PyObject *args)
302{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 ZipImporter *self = (ZipImporter *)obj;
304 PyObject *code, *mod, *dict;
305 char *fullname, *modpath;
306 int ispackage;
Just van Rossum52e14d62002-12-30 22:08:05 +0000307
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000308 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
309 &fullname))
310 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000311
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 code = get_module_code(self, fullname, &ispackage, &modpath);
313 if (code == NULL)
314 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000315
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000316 mod = PyImport_AddModule(fullname);
317 if (mod == NULL) {
318 Py_DECREF(code);
319 return NULL;
320 }
321 dict = PyModule_GetDict(mod);
Just van Rossum52e14d62002-12-30 22:08:05 +0000322
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000323 /* mod.__loader__ = self */
324 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
325 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000326
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000327 if (ispackage) {
328 /* add __path__ to the module *before* the code gets
329 executed */
330 PyObject *pkgpath, *fullpath;
331 char *subname = get_subname(fullname);
332 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000333
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 fullpath = PyUnicode_FromFormat("%U%c%U%s",
335 self->archive, SEP,
336 self->prefix, subname);
337 if (fullpath == NULL)
338 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000339
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 pkgpath = Py_BuildValue("[O]", fullpath);
341 Py_DECREF(fullpath);
342 if (pkgpath == NULL)
343 goto error;
344 err = PyDict_SetItemString(dict, "__path__", pkgpath);
345 Py_DECREF(pkgpath);
346 if (err != 0)
347 goto error;
348 }
349 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
350 Py_DECREF(code);
351 if (Py_VerboseFlag)
352 PySys_WriteStderr("import %s # loaded from Zip %s\n",
353 fullname, modpath);
354 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +0000355error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000356 Py_DECREF(code);
357 Py_DECREF(mod);
358 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000359}
360
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000361/* Return a string matching __file__ for the named module */
362static PyObject *
363zipimporter_get_filename(PyObject *obj, PyObject *args)
364{
365 ZipImporter *self = (ZipImporter *)obj;
366 PyObject *code;
367 char *fullname, *modpath;
368 int ispackage;
369
Nick Coghlan9a1d6e32009-02-08 03:37:27 +0000370 if (!PyArg_ParseTuple(args, "s:zipimporter.get_filename",
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000371 &fullname))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000372 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000373
374 /* Deciding the filename requires working out where the code
375 would come from if the module was actually loaded */
376 code = get_module_code(self, fullname, &ispackage, &modpath);
377 if (code == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000378 return NULL;
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000379 Py_DECREF(code); /* Only need the path info */
380
381 return PyUnicode_FromString(modpath);
382}
383
Just van Rossum52e14d62002-12-30 22:08:05 +0000384/* Return a bool signifying whether the module is a package or not. */
385static PyObject *
386zipimporter_is_package(PyObject *obj, PyObject *args)
387{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000388 ZipImporter *self = (ZipImporter *)obj;
389 char *fullname;
390 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000391
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000392 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
393 &fullname))
394 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 mi = get_module_info(self, fullname);
397 if (mi == MI_ERROR)
398 return NULL;
399 if (mi == MI_NOT_FOUND) {
400 PyErr_Format(ZipImportError, "can't find module '%.200s'",
401 fullname);
402 return NULL;
403 }
404 return PyBool_FromLong(mi == MI_PACKAGE);
Just van Rossum52e14d62002-12-30 22:08:05 +0000405}
406
407static PyObject *
408zipimporter_get_data(PyObject *obj, PyObject *args)
409{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 ZipImporter *self = (ZipImporter *)obj;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000411 PyObject *pathobj, *key;
412 const Py_UNICODE *path;
Just van Rossum52e14d62002-12-30 22:08:05 +0000413#ifdef ALTSEP
Victor Stinner60fe8d92010-08-16 23:48:11 +0000414 Py_UNICODE *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000415#endif
Victor Stinner60fe8d92010-08-16 23:48:11 +0000416 Py_UNICODE *archive;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000417 PyObject *toc_entry;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000418 Py_ssize_t path_len, len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000419
Victor Stinner60fe8d92010-08-16 23:48:11 +0000420 if (!PyArg_ParseTuple(args, "U:zipimporter.get_data", &pathobj))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000421 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000422
Victor Stinner60fe8d92010-08-16 23:48:11 +0000423 path_len = PyUnicode_GET_SIZE(pathobj);
424 path = PyUnicode_AS_UNICODE(pathobj);
Just van Rossum52e14d62002-12-30 22:08:05 +0000425#ifdef ALTSEP
Victor Stinner60fe8d92010-08-16 23:48:11 +0000426 if (path_len >= MAXPATHLEN) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000427 PyErr_SetString(ZipImportError, "path too long");
428 return NULL;
429 }
Victor Stinner60fe8d92010-08-16 23:48:11 +0000430 Py_UNICODE_strcpy(buf, path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000431 for (p = buf; *p; p++) {
432 if (*p == ALTSEP)
433 *p = SEP;
434 }
435 path = buf;
Just van Rossum52e14d62002-12-30 22:08:05 +0000436#endif
Victor Stinner60fe8d92010-08-16 23:48:11 +0000437 archive = PyUnicode_AS_UNICODE(self->archive);
438 len = PyUnicode_GET_SIZE(self->archive);
439 if ((size_t)len < Py_UNICODE_strlen(path) &&
440 Py_UNICODE_strncmp(path, archive, len) == 0 &&
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441 path[len] == SEP) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000442 path += len + 1;
443 path_len -= len + 1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000445
Victor Stinner60fe8d92010-08-16 23:48:11 +0000446 key = PyUnicode_FromUnicode(path, path_len);
447 if (key == NULL)
448 return NULL;
449 toc_entry = PyDict_GetItem(self->files, key);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 if (toc_entry == NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000451 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, key);
452 Py_DECREF(key);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000453 return NULL;
454 }
Victor Stinner60fe8d92010-08-16 23:48:11 +0000455 Py_DECREF(key);
456 return get_data(self->archive, toc_entry);
Just van Rossum52e14d62002-12-30 22:08:05 +0000457}
458
459static PyObject *
460zipimporter_get_code(PyObject *obj, PyObject *args)
461{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000462 ZipImporter *self = (ZipImporter *)obj;
463 char *fullname;
Just van Rossum52e14d62002-12-30 22:08:05 +0000464
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
466 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000467
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000468 return get_module_code(self, fullname, NULL, NULL);
Just van Rossum52e14d62002-12-30 22:08:05 +0000469}
470
471static PyObject *
472zipimporter_get_source(PyObject *obj, PyObject *args)
473{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 ZipImporter *self = (ZipImporter *)obj;
475 PyObject *toc_entry;
476 char *fullname, *subname, path[MAXPATHLEN+1];
477 int len;
478 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000479
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000480 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
481 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000482
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000483 mi = get_module_info(self, fullname);
484 if (mi == MI_ERROR)
485 return NULL;
486 if (mi == MI_NOT_FOUND) {
487 PyErr_Format(ZipImportError, "can't find module '%.200s'",
488 fullname);
489 return NULL;
490 }
491 subname = get_subname(fullname);
Just van Rossum52e14d62002-12-30 22:08:05 +0000492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000493 len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
494 if (len < 0)
495 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000496
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000497 if (mi == MI_PACKAGE) {
498 path[len] = SEP;
499 strcpy(path + len + 1, "__init__.py");
500 }
501 else
502 strcpy(path + len, ".py");
Just van Rossum52e14d62002-12-30 22:08:05 +0000503
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 toc_entry = PyDict_GetItemString(self->files, path);
505 if (toc_entry != NULL) {
Victor Stinner60fe8d92010-08-16 23:48:11 +0000506 PyObject *res, *bytes;
507 bytes = get_data(self->archive, toc_entry);
508 if (bytes == NULL)
509 return NULL;
510 res = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(bytes),
511 PyBytes_GET_SIZE(bytes));
512 Py_DECREF(bytes);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000513 return res;
514 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000515
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000516 /* we have the module, but no source */
517 Py_INCREF(Py_None);
518 return Py_None;
Just van Rossum52e14d62002-12-30 22:08:05 +0000519}
520
521PyDoc_STRVAR(doc_find_module,
522"find_module(fullname, path=None) -> self or None.\n\
523\n\
524Search for a module specified by 'fullname'. 'fullname' must be the\n\
525fully qualified (dotted) module name. It returns the zipimporter\n\
526instance itself if the module was found, or None if it wasn't.\n\
527The optional 'path' argument is ignored -- it's there for compatibility\n\
528with the importer protocol.");
529
530PyDoc_STRVAR(doc_load_module,
531"load_module(fullname) -> module.\n\
532\n\
533Load the module specified by 'fullname'. 'fullname' must be the\n\
534fully qualified (dotted) module name. It returns the imported\n\
535module, or raises ZipImportError if it wasn't found.");
536
537PyDoc_STRVAR(doc_get_data,
538"get_data(pathname) -> string with file data.\n\
539\n\
540Return the data associated with 'pathname'. Raise IOError if\n\
541the file wasn't found.");
542
543PyDoc_STRVAR(doc_is_package,
544"is_package(fullname) -> bool.\n\
545\n\
546Return True if the module specified by fullname is a package.\n\
Brian Curtin32839732010-07-21 01:44:19 +0000547Raise ZipImportError if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000548
549PyDoc_STRVAR(doc_get_code,
550"get_code(fullname) -> code object.\n\
551\n\
552Return the code object for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000553if the module couldn't be found.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000554
555PyDoc_STRVAR(doc_get_source,
556"get_source(fullname) -> source string.\n\
557\n\
558Return the source code for the specified module. Raise ZipImportError\n\
Brian Curtin32839732010-07-21 01:44:19 +0000559if the module couldn't be found, return None if the archive does\n\
Just van Rossum52e14d62002-12-30 22:08:05 +0000560contain the module, but has no source for it.");
561
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000562
563PyDoc_STRVAR(doc_get_filename,
Nick Coghlan9a1d6e32009-02-08 03:37:27 +0000564"get_filename(fullname) -> filename string.\n\
Nick Coghlanf088e5e2008-12-14 11:50:48 +0000565\n\
566Return the filename for the specified module.");
567
Just van Rossum52e14d62002-12-30 22:08:05 +0000568static PyMethodDef zipimporter_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000569 {"find_module", zipimporter_find_module, METH_VARARGS,
570 doc_find_module},
571 {"load_module", zipimporter_load_module, METH_VARARGS,
572 doc_load_module},
573 {"get_data", zipimporter_get_data, METH_VARARGS,
574 doc_get_data},
575 {"get_code", zipimporter_get_code, METH_VARARGS,
576 doc_get_code},
577 {"get_source", zipimporter_get_source, METH_VARARGS,
578 doc_get_source},
579 {"get_filename", zipimporter_get_filename, METH_VARARGS,
580 doc_get_filename},
581 {"is_package", zipimporter_is_package, METH_VARARGS,
582 doc_is_package},
583 {NULL, NULL} /* sentinel */
Just van Rossum52e14d62002-12-30 22:08:05 +0000584};
585
586static PyMemberDef zipimporter_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000587 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
588 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
589 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
590 {NULL}
Just van Rossum52e14d62002-12-30 22:08:05 +0000591};
592
593PyDoc_STRVAR(zipimporter_doc,
594"zipimporter(archivepath) -> zipimporter object\n\
595\n\
596Create a new zipimporter instance. 'archivepath' must be a path to\n\
Alexandre Vassalotti8ae3e052008-05-16 00:41:41 +0000597a zipfile, or to a specific path inside a zipfile. For example, it can be\n\
598'/tmp/myimport.zip', or '/tmp/myimport.zip/mydirectory', if mydirectory is a\n\
599valid directory inside the archive.\n\
600\n\
601'ZipImportError is raised if 'archivepath' doesn't point to a valid Zip\n\
602archive.\n\
603\n\
604The 'archive' attribute of zipimporter objects contains the name of the\n\
605zipfile targeted.");
Just van Rossum52e14d62002-12-30 22:08:05 +0000606
607#define DEFERRED_ADDRESS(ADDR) 0
608
609static PyTypeObject ZipImporter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000610 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
611 "zipimport.zipimporter",
612 sizeof(ZipImporter),
613 0, /* tp_itemsize */
614 (destructor)zipimporter_dealloc, /* tp_dealloc */
615 0, /* tp_print */
616 0, /* tp_getattr */
617 0, /* tp_setattr */
618 0, /* tp_reserved */
619 (reprfunc)zipimporter_repr, /* tp_repr */
620 0, /* tp_as_number */
621 0, /* tp_as_sequence */
622 0, /* tp_as_mapping */
623 0, /* tp_hash */
624 0, /* tp_call */
625 0, /* tp_str */
626 PyObject_GenericGetAttr, /* tp_getattro */
627 0, /* tp_setattro */
628 0, /* tp_as_buffer */
629 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
630 Py_TPFLAGS_HAVE_GC, /* tp_flags */
631 zipimporter_doc, /* tp_doc */
632 zipimporter_traverse, /* tp_traverse */
633 0, /* tp_clear */
634 0, /* tp_richcompare */
635 0, /* tp_weaklistoffset */
636 0, /* tp_iter */
637 0, /* tp_iternext */
638 zipimporter_methods, /* tp_methods */
639 zipimporter_members, /* tp_members */
640 0, /* tp_getset */
641 0, /* tp_base */
642 0, /* tp_dict */
643 0, /* tp_descr_get */
644 0, /* tp_descr_set */
645 0, /* tp_dictoffset */
646 (initproc)zipimporter_init, /* tp_init */
647 PyType_GenericAlloc, /* tp_alloc */
648 PyType_GenericNew, /* tp_new */
649 PyObject_GC_Del, /* tp_free */
Just van Rossum52e14d62002-12-30 22:08:05 +0000650};
651
652
653/* implementation */
654
Just van Rossum52e14d62002-12-30 22:08:05 +0000655/* Given a buffer, return the long that is represented by the first
656 4 bytes, encoded as little endian. This partially reimplements
657 marshal.c:r_long() */
658static long
659get_long(unsigned char *buf) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000660 long x;
661 x = buf[0];
662 x |= (long)buf[1] << 8;
663 x |= (long)buf[2] << 16;
664 x |= (long)buf[3] << 24;
Just van Rossum52e14d62002-12-30 22:08:05 +0000665#if SIZEOF_LONG > 4
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000666 /* Sign extension for 64-bit machines */
667 x |= -(x & 0x80000000L);
Just van Rossum52e14d62002-12-30 22:08:05 +0000668#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000669 return x;
Just van Rossum52e14d62002-12-30 22:08:05 +0000670}
671
672/*
673 read_directory(archive) -> files dict (new reference)
674
675 Given a path to a Zip archive, build a dict, mapping file names
676 (local to the archive, using SEP as a separator) to toc entries.
677
678 A toc_entry is a tuple:
679
Fred Drakef5b7fd22005-11-11 19:34:56 +0000680 (__file__, # value to use for __file__, available for all files
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000681 compress, # compression kind; 0 for uncompressed
682 data_size, # size of compressed data on disk
683 file_size, # size of decompressed data
684 file_offset, # offset of file header from start of archive
685 time, # mod time of file (in dos format)
686 date, # mod data of file (in dos format)
687 crc, # crc checksum of the data
Just van Rossum52e14d62002-12-30 22:08:05 +0000688 )
689
690 Directories can be recognized by the trailing SEP in the name,
691 data_size and file_offset are 0.
692*/
693static PyObject *
Victor Stinner2460a432010-08-16 17:54:28 +0000694read_directory(PyObject *archive_obj)
Just van Rossum52e14d62002-12-30 22:08:05 +0000695{
Victor Stinner2460a432010-08-16 17:54:28 +0000696 /* FIXME: work on Py_UNICODE* instead of char* */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000697 PyObject *files = NULL;
698 FILE *fp;
699 long compress, crc, data_size, file_size, file_offset, date, time;
700 long header_offset, name_size, header_size, header_position;
701 long i, l, count;
702 size_t length;
Victor Stinner2460a432010-08-16 17:54:28 +0000703 Py_UNICODE path[MAXPATHLEN + 5];
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000704 char name[MAXPATHLEN + 5];
Victor Stinner2460a432010-08-16 17:54:28 +0000705 PyObject *nameobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000706 char *p, endof_central_dir[22];
707 long arc_offset; /* offset from beginning of file to start of zip-archive */
Victor Stinner2460a432010-08-16 17:54:28 +0000708 PyObject *pathobj;
Just van Rossum52e14d62002-12-30 22:08:05 +0000709
Victor Stinner2460a432010-08-16 17:54:28 +0000710 if (PyUnicode_GET_SIZE(archive_obj) > MAXPATHLEN) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000711 PyErr_SetString(PyExc_OverflowError,
712 "Zip path name is too long");
713 return NULL;
714 }
Victor Stinner2460a432010-08-16 17:54:28 +0000715 Py_UNICODE_strcpy(path, PyUnicode_AS_UNICODE(archive_obj));
Just van Rossum52e14d62002-12-30 22:08:05 +0000716
Victor Stinner2460a432010-08-16 17:54:28 +0000717 fp = _Py_fopen(archive_obj, "rb");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 if (fp == NULL) {
719 PyErr_Format(ZipImportError, "can't open Zip file: "
Victor Stinner2460a432010-08-16 17:54:28 +0000720 "'%.200U'", archive_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000721 return NULL;
722 }
723 fseek(fp, -22, SEEK_END);
724 header_position = ftell(fp);
725 if (fread(endof_central_dir, 1, 22, fp) != 22) {
726 fclose(fp);
727 PyErr_Format(ZipImportError, "can't read Zip file: "
Victor Stinner2460a432010-08-16 17:54:28 +0000728 "'%.200U'", archive_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 return NULL;
730 }
731 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
732 /* Bad: End of Central Dir signature */
733 fclose(fp);
734 PyErr_Format(ZipImportError, "not a Zip file: "
Victor Stinner2460a432010-08-16 17:54:28 +0000735 "'%.200U'", archive_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000736 return NULL;
737 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000738
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000739 header_size = get_long((unsigned char *)endof_central_dir + 12);
740 header_offset = get_long((unsigned char *)endof_central_dir + 16);
741 arc_offset = header_position - header_offset - header_size;
742 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000743
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 files = PyDict_New();
745 if (files == NULL)
746 goto error;
Just van Rossum52e14d62002-12-30 22:08:05 +0000747
Victor Stinner2460a432010-08-16 17:54:28 +0000748 length = Py_UNICODE_strlen(path);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000749 path[length] = SEP;
Just van Rossum52e14d62002-12-30 22:08:05 +0000750
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000751 /* Start of Central Directory */
752 count = 0;
753 for (;;) {
754 PyObject *t;
755 int err;
Just van Rossum52e14d62002-12-30 22:08:05 +0000756
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000757 fseek(fp, header_offset, 0); /* Start of file header */
758 l = PyMarshal_ReadLongFromFile(fp);
759 if (l != 0x02014B50)
760 break; /* Bad: Central Dir File Header */
761 fseek(fp, header_offset + 10, 0);
762 compress = PyMarshal_ReadShortFromFile(fp);
763 time = PyMarshal_ReadShortFromFile(fp);
764 date = PyMarshal_ReadShortFromFile(fp);
765 crc = PyMarshal_ReadLongFromFile(fp);
766 data_size = PyMarshal_ReadLongFromFile(fp);
767 file_size = PyMarshal_ReadLongFromFile(fp);
768 name_size = PyMarshal_ReadShortFromFile(fp);
769 header_size = 46 + name_size +
770 PyMarshal_ReadShortFromFile(fp) +
771 PyMarshal_ReadShortFromFile(fp);
772 fseek(fp, header_offset + 42, 0);
773 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
774 if (name_size > MAXPATHLEN)
775 name_size = MAXPATHLEN;
Just van Rossum52e14d62002-12-30 22:08:05 +0000776
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000777 p = name;
778 for (i = 0; i < name_size; i++) {
779 *p = (char)getc(fp);
780 if (*p == '/')
781 *p = SEP;
782 p++;
783 }
784 *p = 0; /* Add terminating null byte */
785 header_offset += header_size;
Just van Rossum52e14d62002-12-30 22:08:05 +0000786
Victor Stinner2460a432010-08-16 17:54:28 +0000787 nameobj = PyUnicode_DecodeFSDefaultAndSize(name, name_size);
788 if (nameobj == NULL)
789 goto error;
790 Py_UNICODE_strncpy(path + length + 1, PyUnicode_AS_UNICODE(nameobj), MAXPATHLEN - length - 1);
Just van Rossum52e14d62002-12-30 22:08:05 +0000791
Victor Stinner2460a432010-08-16 17:54:28 +0000792 pathobj = PyUnicode_FromUnicode(path, Py_UNICODE_strlen(path));
793 if (pathobj == NULL)
794 goto error;
795 t = Py_BuildValue("Niiiiiii", pathobj, compress, data_size,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000796 file_size, file_offset, time, date, crc);
797 if (t == NULL)
798 goto error;
Victor Stinner2460a432010-08-16 17:54:28 +0000799 err = PyDict_SetItem(files, nameobj, t);
800 Py_CLEAR(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000801 Py_DECREF(t);
802 if (err != 0)
803 goto error;
804 count++;
805 }
806 fclose(fp);
807 if (Py_VerboseFlag)
Victor Stinner2460a432010-08-16 17:54:28 +0000808 PySys_FormatStderr("# zipimport: found %ld names in %U\n",
809 count, archive_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000810 return files;
Just van Rossum52e14d62002-12-30 22:08:05 +0000811error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000812 fclose(fp);
813 Py_XDECREF(files);
Victor Stinner2460a432010-08-16 17:54:28 +0000814 Py_XDECREF(nameobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000816}
817
818/* Return the zlib.decompress function object, or NULL if zlib couldn't
819 be imported. The function is cached when found, so subsequent calls
820 don't import zlib again. Returns a *borrowed* reference.
821 XXX This makes zlib.decompress immortal. */
822static PyObject *
823get_decompress_func(void)
824{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000825 static PyObject *decompress = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +0000826
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000827 if (decompress == NULL) {
828 PyObject *zlib;
829 static int importing_zlib = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000830
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000831 if (importing_zlib != 0)
832 /* Someone has a zlib.py[co] in their Zip file;
833 let's avoid a stack overflow. */
834 return NULL;
835 importing_zlib = 1;
836 zlib = PyImport_ImportModuleNoBlock("zlib");
837 importing_zlib = 0;
838 if (zlib != NULL) {
839 decompress = PyObject_GetAttrString(zlib,
840 "decompress");
841 Py_DECREF(zlib);
842 }
843 else
844 PyErr_Clear();
845 if (Py_VerboseFlag)
846 PySys_WriteStderr("# zipimport: zlib %s\n",
847 zlib != NULL ? "available": "UNAVAILABLE");
848 }
849 return decompress;
Just van Rossum52e14d62002-12-30 22:08:05 +0000850}
851
852/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
853 data as a new reference. */
854static PyObject *
Victor Stinner60fe8d92010-08-16 23:48:11 +0000855get_data(PyObject *archive, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +0000856{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000857 PyObject *raw_data, *data = NULL, *decompress;
858 char *buf;
859 FILE *fp;
860 int err;
861 Py_ssize_t bytes_read = 0;
862 long l;
Victor Stinner60fe8d92010-08-16 23:48:11 +0000863 PyObject *datapath;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000864 long compress, data_size, file_size, file_offset, bytes_size;
865 long time, date, crc;
Just van Rossum52e14d62002-12-30 22:08:05 +0000866
Victor Stinner60fe8d92010-08-16 23:48:11 +0000867 if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 &data_size, &file_size, &file_offset, &time,
869 &date, &crc)) {
870 return NULL;
871 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000872
Victor Stinner60fe8d92010-08-16 23:48:11 +0000873 fp = _Py_fopen(archive, "rb");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 if (!fp) {
875 PyErr_Format(PyExc_IOError,
Victor Stinner60fe8d92010-08-16 23:48:11 +0000876 "zipimport: can not open file %U", archive);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000877 return NULL;
878 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000879
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000880 /* Check to make sure the local file header is correct */
881 fseek(fp, file_offset, 0);
882 l = PyMarshal_ReadLongFromFile(fp);
883 if (l != 0x04034B50) {
884 /* Bad: Local File Header */
885 PyErr_Format(ZipImportError,
Victor Stinner60fe8d92010-08-16 23:48:11 +0000886 "bad local file header in %U",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000887 archive);
888 fclose(fp);
889 return NULL;
890 }
891 fseek(fp, file_offset + 26, 0);
892 l = 30 + PyMarshal_ReadShortFromFile(fp) +
893 PyMarshal_ReadShortFromFile(fp); /* local header size */
894 file_offset += l; /* Start of file data */
Just van Rossum52e14d62002-12-30 22:08:05 +0000895
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000896 bytes_size = compress == 0 ? data_size : data_size + 1;
897 if (bytes_size == 0)
898 bytes_size++;
899 raw_data = PyBytes_FromStringAndSize((char *)NULL, bytes_size);
Just van Rossum52e14d62002-12-30 22:08:05 +0000900
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000901 if (raw_data == NULL) {
902 fclose(fp);
903 return NULL;
904 }
905 buf = PyBytes_AsString(raw_data);
Just van Rossum52e14d62002-12-30 22:08:05 +0000906
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000907 err = fseek(fp, file_offset, 0);
908 if (err == 0)
909 bytes_read = fread(buf, 1, data_size, fp);
910 fclose(fp);
911 if (err || bytes_read != data_size) {
912 PyErr_SetString(PyExc_IOError,
913 "zipimport: can't read data");
914 Py_DECREF(raw_data);
915 return NULL;
916 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000917
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000918 if (compress != 0) {
919 buf[data_size] = 'Z'; /* saw this in zipfile.py */
920 data_size++;
921 }
922 buf[data_size] = '\0';
Just van Rossum52e14d62002-12-30 22:08:05 +0000923
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000924 if (compress == 0) { /* data is not compressed */
925 data = PyBytes_FromStringAndSize(buf, data_size);
926 Py_DECREF(raw_data);
927 return data;
928 }
929
930 /* Decompress with zlib */
931 decompress = get_decompress_func();
932 if (decompress == NULL) {
933 PyErr_SetString(ZipImportError,
934 "can't decompress data; "
935 "zlib not available");
936 goto error;
937 }
938 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000939error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000940 Py_DECREF(raw_data);
941 return data;
Just van Rossum52e14d62002-12-30 22:08:05 +0000942}
943
944/* Lenient date/time comparison function. The precision of the mtime
945 in the archive is lower than the mtime stored in a .pyc: we
946 must allow a difference of at most one second. */
947static int
948eq_mtime(time_t t1, time_t t2)
949{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000950 time_t d = t1 - t2;
951 if (d < 0)
952 d = -d;
953 /* dostime only stores even seconds, so be lenient */
954 return d <= 1;
Just van Rossum52e14d62002-12-30 22:08:05 +0000955}
956
957/* Given the contents of a .py[co] file in a buffer, unmarshal the data
958 and return the code object. Return None if it the magic word doesn't
959 match (we do this instead of raising an exception as we fall back
960 to .py if available and we don't want to mask other errors).
961 Returns a new reference. */
962static PyObject *
963unmarshal_code(char *pathname, PyObject *data, time_t mtime)
964{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000965 PyObject *code;
966 char *buf = PyBytes_AsString(data);
967 Py_ssize_t size = PyBytes_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +0000968
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000969 if (size <= 9) {
970 PyErr_SetString(ZipImportError,
971 "bad pyc data");
972 return NULL;
973 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000974
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000975 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
976 if (Py_VerboseFlag)
977 PySys_WriteStderr("# %s has bad magic\n",
978 pathname);
979 Py_INCREF(Py_None);
980 return Py_None; /* signal caller to try alternative */
981 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000982
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000983 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
984 mtime)) {
985 if (Py_VerboseFlag)
986 PySys_WriteStderr("# %s has bad mtime\n",
987 pathname);
988 Py_INCREF(Py_None);
989 return Py_None; /* signal caller to try alternative */
990 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000991
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000992 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
993 if (code == NULL)
994 return NULL;
995 if (!PyCode_Check(code)) {
996 Py_DECREF(code);
997 PyErr_Format(PyExc_TypeError,
998 "compiled module %.200s is not a code object",
999 pathname);
1000 return NULL;
1001 }
1002 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001003}
1004
1005/* Replace any occurances of "\r\n?" in the input string with "\n".
1006 This converts DOS and Mac line endings to Unix line endings.
1007 Also append a trailing "\n" to be compatible with
1008 PyParser_SimpleParseFile(). Returns a new reference. */
1009static PyObject *
1010normalize_line_endings(PyObject *source)
1011{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001012 char *buf, *q, *p = PyBytes_AsString(source);
1013 PyObject *fixed_source;
1014 int len = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +00001015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001016 if (!p) {
1017 return PyBytes_FromStringAndSize("\n\0", 2);
1018 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001019
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 /* one char extra for trailing \n and one for terminating \0 */
1021 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
1022 if (buf == NULL) {
1023 PyErr_SetString(PyExc_MemoryError,
1024 "zipimport: no memory to allocate "
1025 "source buffer");
1026 return NULL;
1027 }
1028 /* replace "\r\n?" by "\n" */
1029 for (q = buf; *p != '\0'; p++) {
1030 if (*p == '\r') {
1031 *q++ = '\n';
1032 if (*(p + 1) == '\n')
1033 p++;
1034 }
1035 else
1036 *q++ = *p;
1037 len++;
1038 }
1039 *q++ = '\n'; /* add trailing \n */
1040 *q = '\0';
1041 fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
1042 PyMem_Free(buf);
1043 return fixed_source;
Just van Rossum52e14d62002-12-30 22:08:05 +00001044}
1045
1046/* Given a string buffer containing Python source code, compile it
1047 return and return a code object as a new reference. */
1048static PyObject *
1049compile_source(char *pathname, PyObject *source)
1050{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001051 PyObject *code, *fixed_source;
Just van Rossum52e14d62002-12-30 22:08:05 +00001052
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001053 fixed_source = normalize_line_endings(source);
1054 if (fixed_source == NULL)
1055 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 code = Py_CompileString(PyBytes_AsString(fixed_source), pathname,
1058 Py_file_input);
1059 Py_DECREF(fixed_source);
1060 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001061}
1062
1063/* Convert the date/time values found in the Zip archive to a value
1064 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001065static time_t
1066parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001067{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 struct tm stm;
Just van Rossum52e14d62002-12-30 22:08:05 +00001069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 memset((void *) &stm, '\0', sizeof(stm));
Christian Heimes679db4a2008-01-18 09:56:22 +00001071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 stm.tm_sec = (dostime & 0x1f) * 2;
1073 stm.tm_min = (dostime >> 5) & 0x3f;
1074 stm.tm_hour = (dostime >> 11) & 0x1f;
1075 stm.tm_mday = dosdate & 0x1f;
1076 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1077 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
1078 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 return mktime(&stm);
Just van Rossum52e14d62002-12-30 22:08:05 +00001081}
1082
1083/* Given a path to a .pyc or .pyo file in the archive, return the
1084 modifictaion time of the matching .py file, or 0 if no source
1085 is available. */
1086static time_t
1087get_mtime_of_source(ZipImporter *self, char *path)
1088{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 PyObject *toc_entry;
1090 time_t mtime = 0;
1091 Py_ssize_t lastchar = strlen(path) - 1;
1092 char savechar = path[lastchar];
1093 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1094 toc_entry = PyDict_GetItemString(self->files, path);
1095 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1096 PyTuple_Size(toc_entry) == 8) {
1097 /* fetch the time stamp of the .py file for comparison
1098 with an embedded pyc time stamp */
1099 int time, date;
1100 time = PyLong_AsLong(PyTuple_GetItem(toc_entry, 5));
1101 date = PyLong_AsLong(PyTuple_GetItem(toc_entry, 6));
1102 mtime = parse_dostime(time, date);
1103 }
1104 path[lastchar] = savechar;
1105 return mtime;
Just van Rossum52e14d62002-12-30 22:08:05 +00001106}
1107
1108/* Return the code object for the module named by 'fullname' from the
1109 Zip archive as a new reference. */
1110static PyObject *
1111get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001112 time_t mtime, PyObject *toc_entry)
Just van Rossum52e14d62002-12-30 22:08:05 +00001113{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 PyObject *data, *code;
1115 char *modpath;
Just van Rossum52e14d62002-12-30 22:08:05 +00001116
Victor Stinner60fe8d92010-08-16 23:48:11 +00001117 data = get_data(self->archive, toc_entry);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 if (data == NULL)
1119 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001120
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001121 modpath = _PyUnicode_AsString(PyTuple_GetItem(toc_entry, 0));
Just van Rossum52e14d62002-12-30 22:08:05 +00001122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 if (isbytecode) {
1124 code = unmarshal_code(modpath, data, mtime);
1125 }
1126 else {
1127 code = compile_source(modpath, data);
1128 }
1129 Py_DECREF(data);
1130 return code;
Just van Rossum52e14d62002-12-30 22:08:05 +00001131}
1132
1133/* Get the code object assoiciated with the module specified by
1134 'fullname'. */
1135static PyObject *
1136get_module_code(ZipImporter *self, char *fullname,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 int *p_ispackage, char **p_modpath)
Just van Rossum52e14d62002-12-30 22:08:05 +00001138{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 PyObject *toc_entry;
1140 char *subname, path[MAXPATHLEN + 1];
1141 int len;
1142 struct st_zip_searchorder *zso;
Just van Rossum52e14d62002-12-30 22:08:05 +00001143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 subname = get_subname(fullname);
Just van Rossum52e14d62002-12-30 22:08:05 +00001145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 len = make_filename(_PyUnicode_AsString(self->prefix), subname, path);
1147 if (len < 0)
1148 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 for (zso = zip_searchorder; *zso->suffix; zso++) {
1151 PyObject *code = NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001153 strcpy(path + len, zso->suffix);
1154 if (Py_VerboseFlag > 1)
1155 PySys_WriteStderr("# trying %s%c%s\n",
1156 _PyUnicode_AsString(self->archive),
1157 (int)SEP, path);
1158 toc_entry = PyDict_GetItemString(self->files, path);
1159 if (toc_entry != NULL) {
1160 time_t mtime = 0;
1161 int ispackage = zso->type & IS_PACKAGE;
1162 int isbytecode = zso->type & IS_BYTECODE;
Just van Rossum52e14d62002-12-30 22:08:05 +00001163
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 if (isbytecode)
1165 mtime = get_mtime_of_source(self, path);
1166 if (p_ispackage != NULL)
1167 *p_ispackage = ispackage;
1168 code = get_code_from_data(self, ispackage,
1169 isbytecode, mtime,
1170 toc_entry);
1171 if (code == Py_None) {
1172 /* bad magic number or non-matching mtime
1173 in byte code, try next */
1174 Py_DECREF(code);
1175 continue;
1176 }
1177 if (code != NULL && p_modpath != NULL)
1178 *p_modpath = _PyUnicode_AsString(
1179 PyTuple_GetItem(toc_entry, 0));
1180 return code;
1181 }
1182 }
1183 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1184 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001185}
1186
1187
1188/* Module init */
1189
1190PyDoc_STRVAR(zipimport_doc,
1191"zipimport provides support for importing Python modules from Zip archives.\n\
1192\n\
1193This module exports three objects:\n\
1194- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001195- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001196 subclass of ImportError, so it can be caught as ImportError, too.\n\
1197- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1198 info dicts, as used in zipimporter._files.\n\
1199\n\
1200It is usually not needed to use the zipimport module explicitly; it is\n\
1201used by the builtin import mechanism for sys.path items that are paths\n\
1202to Zip archives.");
1203
Martin v. Löwis1a214512008-06-11 05:26:20 +00001204static struct PyModuleDef zipimportmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 PyModuleDef_HEAD_INIT,
1206 "zipimport",
1207 zipimport_doc,
1208 -1,
1209 NULL,
1210 NULL,
1211 NULL,
1212 NULL,
1213 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001214};
1215
Just van Rossum52e14d62002-12-30 22:08:05 +00001216PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001217PyInit_zipimport(void)
Just van Rossum52e14d62002-12-30 22:08:05 +00001218{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001219 PyObject *mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 if (PyType_Ready(&ZipImporter_Type) < 0)
1222 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001223
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001224 /* Correct directory separator */
1225 zip_searchorder[0].suffix[0] = SEP;
1226 zip_searchorder[1].suffix[0] = SEP;
1227 zip_searchorder[2].suffix[0] = SEP;
1228 if (Py_OptimizeFlag) {
1229 /* Reverse *.pyc and *.pyo */
1230 struct st_zip_searchorder tmp;
1231 tmp = zip_searchorder[0];
1232 zip_searchorder[0] = zip_searchorder[1];
1233 zip_searchorder[1] = tmp;
1234 tmp = zip_searchorder[3];
1235 zip_searchorder[3] = zip_searchorder[4];
1236 zip_searchorder[4] = tmp;
1237 }
Just van Rossum52e14d62002-12-30 22:08:05 +00001238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 mod = PyModule_Create(&zipimportmodule);
1240 if (mod == NULL)
1241 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001242
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1244 PyExc_ImportError, NULL);
1245 if (ZipImportError == NULL)
1246 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001247
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001248 Py_INCREF(ZipImportError);
1249 if (PyModule_AddObject(mod, "ZipImportError",
1250 ZipImportError) < 0)
1251 return NULL;
Just van Rossum52e14d62002-12-30 22:08:05 +00001252
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001253 Py_INCREF(&ZipImporter_Type);
1254 if (PyModule_AddObject(mod, "zipimporter",
1255 (PyObject *)&ZipImporter_Type) < 0)
1256 return NULL;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001257
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001258 zip_directory_cache = PyDict_New();
1259 if (zip_directory_cache == NULL)
1260 return NULL;
1261 Py_INCREF(zip_directory_cache);
1262 if (PyModule_AddObject(mod, "_zip_directory_cache",
1263 zip_directory_cache) < 0)
1264 return NULL;
1265 return mod;
Just van Rossum52e14d62002-12-30 22:08:05 +00001266}