blob: fc93011eb79ff8ab2ec9db18db1932e15319744c [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
13 char suffix[14];
14 int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Just van Rossum52e14d62002-12-30 22:08:05 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
41};
42
Just van Rossum52e14d62002-12-30 22:08:05 +000043static PyObject *ZipImportError;
44static PyObject *zip_directory_cache = NULL;
45
46/* forward decls */
47static PyObject *read_directory(char *archive);
48static PyObject *get_data(char *archive, PyObject *toc_entry);
49static PyObject *get_module_code(ZipImporter *self, char *fullname,
50 int *p_ispackage, char **p_modpath);
51
52
53#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
54
55
56/* zipimporter.__init__
57 Split the "subdirectory" from the Zip archive path, lookup a matching
58 entry in sys.path_importer_cache, fetch the file directory from there
59 if found, or else read it from the archive. */
60static int
61zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
62{
63 char *path, *p, *prefix, buf[MAXPATHLEN+2];
Neal Norwitzd39d8612006-01-08 01:03:36 +000064 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +000065
Georg Brandl02c42872005-08-26 06:42:30 +000066 if (!_PyArg_NoKeywords("zipimporter()", kwds))
67 return -1;
68
Just van Rossum52e14d62002-12-30 22:08:05 +000069 if (!PyArg_ParseTuple(args, "s:zipimporter",
70 &path))
71 return -1;
72
73 len = strlen(path);
74 if (len == 0) {
75 PyErr_SetString(ZipImportError, "archive path is empty");
76 return -1;
77 }
78 if (len >= MAXPATHLEN) {
79 PyErr_SetString(ZipImportError,
80 "archive path too long");
81 return -1;
82 }
83 strcpy(buf, path);
84
85#ifdef ALTSEP
86 for (p = buf; *p; p++) {
87 if (*p == ALTSEP)
88 *p = SEP;
89 }
90#endif
91
92 path = NULL;
93 prefix = NULL;
94 for (;;) {
Martin v. Löwisa94568a2003-05-10 07:36:56 +000095#ifndef RISCOS
Just van Rossum52e14d62002-12-30 22:08:05 +000096 struct stat statbuf;
97 int rv;
98
99 rv = stat(buf, &statbuf);
100 if (rv == 0) {
101 /* it exists */
102 if (S_ISREG(statbuf.st_mode))
103 /* it's a file */
104 path = buf;
105 break;
106 }
Martin v. Löwisa94568a2003-05-10 07:36:56 +0000107#else
108 if (object_exists(buf)) {
109 /* it exists */
110 if (isfile(buf))
111 /* it's a file */
112 path = buf;
113 break;
114 }
115#endif
Just van Rossum52e14d62002-12-30 22:08:05 +0000116 /* back up one path element */
Just van Rossumd35c6db2003-01-02 12:55:48 +0000117 p = strrchr(buf, SEP);
Just van Rossum52e14d62002-12-30 22:08:05 +0000118 if (prefix != NULL)
119 *prefix = SEP;
120 if (p == NULL)
121 break;
122 *p = '\0';
123 prefix = p;
124 }
125 if (path != NULL) {
126 PyObject *files;
127 files = PyDict_GetItemString(zip_directory_cache, path);
128 if (files == NULL) {
129 files = read_directory(buf);
130 if (files == NULL)
131 return -1;
132 if (PyDict_SetItemString(zip_directory_cache, path,
133 files) != 0)
134 return -1;
135 }
136 else
137 Py_INCREF(files);
138 self->files = files;
139 }
140 else {
141 PyErr_SetString(ZipImportError, "not a Zip file");
142 return -1;
143 }
144
145 if (prefix == NULL)
146 prefix = "";
147 else {
148 prefix++;
149 len = strlen(prefix);
150 if (prefix[len-1] != SEP) {
151 /* add trailing SEP */
152 prefix[len] = SEP;
153 prefix[len + 1] = '\0';
154 }
155 }
156
157 self->archive = PyString_FromString(buf);
158 if (self->archive == NULL)
159 return -1;
160
161 self->prefix = PyString_FromString(prefix);
162 if (self->prefix == NULL)
163 return -1;
164
165 return 0;
166}
167
168/* GC support. */
169static int
170zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
171{
172 ZipImporter *self = (ZipImporter *)obj;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000173 Py_VISIT(self->files);
Just van Rossum52e14d62002-12-30 22:08:05 +0000174 return 0;
175}
176
177static void
178zipimporter_dealloc(ZipImporter *self)
179{
180 PyObject_GC_UnTrack(self);
181 Py_XDECREF(self->archive);
Just van Rossumf8b6de12002-12-31 09:51:59 +0000182 Py_XDECREF(self->prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000183 Py_XDECREF(self->files);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000184 Py_Type(self)->tp_free((PyObject *)self);
Just van Rossum52e14d62002-12-30 22:08:05 +0000185}
186
187static PyObject *
188zipimporter_repr(ZipImporter *self)
189{
Just van Rossum52e14d62002-12-30 22:08:05 +0000190 char *archive = "???";
191 char *prefix = "";
192
193 if (self->archive != NULL && PyString_Check(self->archive))
194 archive = PyString_AsString(self->archive);
195 if (self->prefix != NULL && PyString_Check(self->prefix))
196 prefix = PyString_AsString(self->prefix);
197 if (prefix != NULL && *prefix)
Walter Dörwald5b0443c2007-06-05 16:19:33 +0000198 return PyUnicode_FromFormat("<zipimporter object \"%.300s%c%.150s\">",
199 archive, SEP, prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000200 else
Walter Dörwald5b0443c2007-06-05 16:19:33 +0000201 return PyUnicode_FromFormat("<zipimporter object \"%.300s\">",
202 archive);
Just van Rossum52e14d62002-12-30 22:08:05 +0000203}
204
205/* return fullname.split(".")[-1] */
206static char *
207get_subname(char *fullname)
208{
209 char *subname = strrchr(fullname, '.');
210 if (subname == NULL)
211 subname = fullname;
212 else
213 subname++;
214 return subname;
215}
216
217/* Given a (sub)modulename, write the potential file path in the
218 archive (without extension) to the path buffer. Return the
219 length of the resulting string. */
220static int
221make_filename(char *prefix, char *name, char *path)
222{
Neal Norwitzd39d8612006-01-08 01:03:36 +0000223 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000224 char *p;
225
226 len = strlen(prefix);
227
228 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
229 if (len + strlen(name) + 13 >= MAXPATHLEN) {
230 PyErr_SetString(ZipImportError, "path too long");
231 return -1;
232 }
233
234 strcpy(path, prefix);
235 strcpy(path + len, name);
236 for (p = path + len; *p; p++) {
237 if (*p == '.')
238 *p = SEP;
239 }
240 len += strlen(name);
Martin v. Löwis18e16552006-02-15 17:27:45 +0000241 assert(len < INT_MAX);
Neal Norwitzd39d8612006-01-08 01:03:36 +0000242 return (int)len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000243}
244
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000245enum zi_module_info {
Just van Rossum52e14d62002-12-30 22:08:05 +0000246 MI_ERROR,
247 MI_NOT_FOUND,
248 MI_MODULE,
249 MI_PACKAGE
250};
251
252/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000253static enum zi_module_info
Just van Rossum52e14d62002-12-30 22:08:05 +0000254get_module_info(ZipImporter *self, char *fullname)
255{
256 char *subname, path[MAXPATHLEN + 1];
257 int len;
258 struct st_zip_searchorder *zso;
259
260 subname = get_subname(fullname);
261
262 len = make_filename(PyString_AsString(self->prefix), subname, path);
263 if (len < 0)
264 return MI_ERROR;
265
266 for (zso = zip_searchorder; *zso->suffix; zso++) {
267 strcpy(path + len, zso->suffix);
268 if (PyDict_GetItemString(self->files, path) != NULL) {
269 if (zso->type & IS_PACKAGE)
270 return MI_PACKAGE;
271 else
272 return MI_MODULE;
273 }
274 }
275 return MI_NOT_FOUND;
276}
277
278/* Check whether we can satisfy the import of the module named by
279 'fullname'. Return self if we can, None if we can't. */
280static PyObject *
281zipimporter_find_module(PyObject *obj, PyObject *args)
282{
283 ZipImporter *self = (ZipImporter *)obj;
284 PyObject *path = NULL;
285 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000286 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000287
288 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
289 &fullname, &path))
290 return NULL;
291
292 mi = get_module_info(self, fullname);
293 if (mi == MI_ERROR)
294 return NULL;
295 if (mi == MI_NOT_FOUND) {
296 Py_INCREF(Py_None);
297 return Py_None;
298 }
299 Py_INCREF(self);
300 return (PyObject *)self;
301}
302
303/* Load and return the module named by 'fullname'. */
304static PyObject *
305zipimporter_load_module(PyObject *obj, PyObject *args)
306{
307 ZipImporter *self = (ZipImporter *)obj;
308 PyObject *code, *mod, *dict;
309 char *fullname, *modpath;
310 int ispackage;
311
312 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
313 &fullname))
314 return NULL;
315
316 code = get_module_code(self, fullname, &ispackage, &modpath);
317 if (code == NULL)
318 return NULL;
319
320 mod = PyImport_AddModule(fullname);
321 if (mod == NULL) {
322 Py_DECREF(code);
323 return NULL;
324 }
325 dict = PyModule_GetDict(mod);
326
327 /* mod.__loader__ = self */
328 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
329 goto error;
330
331 if (ispackage) {
332 /* add __path__ to the module *before* the code gets
333 executed */
334 PyObject *pkgpath, *fullpath;
335 char *prefix = PyString_AsString(self->prefix);
336 char *subname = get_subname(fullname);
337 int err;
338
339 fullpath = PyString_FromFormat("%s%c%s%s",
340 PyString_AsString(self->archive),
341 SEP,
342 *prefix ? prefix : "",
343 subname);
344 if (fullpath == NULL)
345 goto error;
346
347 pkgpath = Py_BuildValue("[O]", fullpath);
348 Py_DECREF(fullpath);
349 if (pkgpath == NULL)
350 goto error;
351 err = PyDict_SetItemString(dict, "__path__", pkgpath);
352 Py_DECREF(pkgpath);
353 if (err != 0)
354 goto error;
355 }
356 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
357 Py_DECREF(code);
358 if (Py_VerboseFlag)
359 PySys_WriteStderr("import %s # loaded from Zip %s\n",
360 fullname, modpath);
361 return mod;
362error:
363 Py_DECREF(code);
364 Py_DECREF(mod);
365 return NULL;
366}
367
368/* Return a bool signifying whether the module is a package or not. */
369static PyObject *
370zipimporter_is_package(PyObject *obj, PyObject *args)
371{
372 ZipImporter *self = (ZipImporter *)obj;
373 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000374 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000375
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000376 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
Just van Rossum52e14d62002-12-30 22:08:05 +0000377 &fullname))
378 return NULL;
379
380 mi = get_module_info(self, fullname);
381 if (mi == MI_ERROR)
382 return NULL;
383 if (mi == MI_NOT_FOUND) {
384 PyErr_Format(ZipImportError, "can't find module '%.200s'",
385 fullname);
386 return NULL;
387 }
388 return PyBool_FromLong(mi == MI_PACKAGE);
389}
390
391static PyObject *
392zipimporter_get_data(PyObject *obj, PyObject *args)
393{
394 ZipImporter *self = (ZipImporter *)obj;
395 char *path;
396#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000397 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000398#endif
399 PyObject *toc_entry;
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000400 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000401
402 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
403 return NULL;
404
405#ifdef ALTSEP
406 if (strlen(path) >= MAXPATHLEN) {
407 PyErr_SetString(ZipImportError, "path too long");
408 return NULL;
409 }
410 strcpy(buf, path);
411 for (p = buf; *p; p++) {
412 if (*p == ALTSEP)
413 *p = SEP;
414 }
415 path = buf;
416#endif
417 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000418 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000419 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
420 path[len] == SEP) {
421 path = path + len + 1;
422 }
423
424 toc_entry = PyDict_GetItemString(self->files, path);
425 if (toc_entry == NULL) {
Georg Brandle9b19492006-02-19 09:38:58 +0000426 PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
Just van Rossum52e14d62002-12-30 22:08:05 +0000427 return NULL;
428 }
429 return get_data(PyString_AsString(self->archive), toc_entry);
430}
431
432static PyObject *
433zipimporter_get_code(PyObject *obj, PyObject *args)
434{
435 ZipImporter *self = (ZipImporter *)obj;
436 char *fullname;
437
438 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
439 return NULL;
440
441 return get_module_code(self, fullname, NULL, NULL);
442}
443
444static PyObject *
445zipimporter_get_source(PyObject *obj, PyObject *args)
446{
447 ZipImporter *self = (ZipImporter *)obj;
448 PyObject *toc_entry;
449 char *fullname, *subname, path[MAXPATHLEN+1];
450 int len;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000451 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000452
453 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
454 return NULL;
455
456 mi = get_module_info(self, fullname);
457 if (mi == MI_ERROR)
458 return NULL;
459 if (mi == MI_NOT_FOUND) {
460 PyErr_Format(ZipImportError, "can't find module '%.200s'",
461 fullname);
462 return NULL;
463 }
464 subname = get_subname(fullname);
465
466 len = make_filename(PyString_AsString(self->prefix), subname, path);
467 if (len < 0)
468 return NULL;
469
470 if (mi == MI_PACKAGE) {
471 path[len] = SEP;
472 strcpy(path + len + 1, "__init__.py");
473 }
474 else
475 strcpy(path + len, ".py");
476
477 toc_entry = PyDict_GetItemString(self->files, path);
Guido van Rossumad8d3002007-08-03 18:40:49 +0000478 if (toc_entry != NULL) {
479 PyObject *bytes = get_data(PyString_AsString(self->archive), toc_entry);
480 PyObject *res = PyUnicode_FromString(PyBytes_AsString(bytes));
481 Py_XDECREF(bytes);
482 return res;
483 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000484
485 /* we have the module, but no source */
486 Py_INCREF(Py_None);
487 return Py_None;
488}
489
490PyDoc_STRVAR(doc_find_module,
491"find_module(fullname, path=None) -> self or None.\n\
492\n\
493Search for a module specified by 'fullname'. 'fullname' must be the\n\
494fully qualified (dotted) module name. It returns the zipimporter\n\
495instance itself if the module was found, or None if it wasn't.\n\
496The optional 'path' argument is ignored -- it's there for compatibility\n\
497with the importer protocol.");
498
499PyDoc_STRVAR(doc_load_module,
500"load_module(fullname) -> module.\n\
501\n\
502Load the module specified by 'fullname'. 'fullname' must be the\n\
503fully qualified (dotted) module name. It returns the imported\n\
504module, or raises ZipImportError if it wasn't found.");
505
506PyDoc_STRVAR(doc_get_data,
507"get_data(pathname) -> string with file data.\n\
508\n\
509Return the data associated with 'pathname'. Raise IOError if\n\
510the file wasn't found.");
511
512PyDoc_STRVAR(doc_is_package,
513"is_package(fullname) -> bool.\n\
514\n\
515Return True if the module specified by fullname is a package.\n\
516Raise ZipImportError is the module couldn't be found.");
517
518PyDoc_STRVAR(doc_get_code,
519"get_code(fullname) -> code object.\n\
520\n\
521Return the code object for the specified module. Raise ZipImportError\n\
522is the module couldn't be found.");
523
524PyDoc_STRVAR(doc_get_source,
525"get_source(fullname) -> source string.\n\
526\n\
527Return the source code for the specified module. Raise ZipImportError\n\
528is the module couldn't be found, return None if the archive does\n\
529contain the module, but has no source for it.");
530
531static PyMethodDef zipimporter_methods[] = {
532 {"find_module", zipimporter_find_module, METH_VARARGS,
533 doc_find_module},
534 {"load_module", zipimporter_load_module, METH_VARARGS,
535 doc_load_module},
536 {"get_data", zipimporter_get_data, METH_VARARGS,
537 doc_get_data},
538 {"get_code", zipimporter_get_code, METH_VARARGS,
539 doc_get_code},
540 {"get_source", zipimporter_get_source, METH_VARARGS,
541 doc_get_source},
542 {"is_package", zipimporter_is_package, METH_VARARGS,
543 doc_is_package},
544 {NULL, NULL} /* sentinel */
545};
546
547static PyMemberDef zipimporter_members[] = {
548 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
549 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
550 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
551 {NULL}
552};
553
554PyDoc_STRVAR(zipimporter_doc,
555"zipimporter(archivepath) -> zipimporter object\n\
556\n\
557Create a new zipimporter instance. 'archivepath' must be a path to\n\
558a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
559a valid Zip archive.");
560
561#define DEFERRED_ADDRESS(ADDR) 0
562
563static PyTypeObject ZipImporter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000564 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
Just van Rossum52e14d62002-12-30 22:08:05 +0000565 "zipimport.zipimporter",
566 sizeof(ZipImporter),
567 0, /* tp_itemsize */
568 (destructor)zipimporter_dealloc, /* tp_dealloc */
569 0, /* tp_print */
570 0, /* tp_getattr */
571 0, /* tp_setattr */
572 0, /* tp_compare */
573 (reprfunc)zipimporter_repr, /* tp_repr */
574 0, /* tp_as_number */
575 0, /* tp_as_sequence */
576 0, /* tp_as_mapping */
577 0, /* tp_hash */
578 0, /* tp_call */
579 0, /* tp_str */
580 PyObject_GenericGetAttr, /* tp_getattro */
581 0, /* tp_setattro */
582 0, /* tp_as_buffer */
583 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
584 Py_TPFLAGS_HAVE_GC, /* tp_flags */
585 zipimporter_doc, /* tp_doc */
586 zipimporter_traverse, /* tp_traverse */
587 0, /* tp_clear */
588 0, /* tp_richcompare */
589 0, /* tp_weaklistoffset */
590 0, /* tp_iter */
591 0, /* tp_iternext */
592 zipimporter_methods, /* tp_methods */
593 zipimporter_members, /* tp_members */
594 0, /* tp_getset */
595 0, /* tp_base */
596 0, /* tp_dict */
597 0, /* tp_descr_get */
598 0, /* tp_descr_set */
599 0, /* tp_dictoffset */
600 (initproc)zipimporter_init, /* tp_init */
601 PyType_GenericAlloc, /* tp_alloc */
602 PyType_GenericNew, /* tp_new */
603 PyObject_GC_Del, /* tp_free */
604};
605
606
607/* implementation */
608
Just van Rossum52e14d62002-12-30 22:08:05 +0000609/* Given a buffer, return the long that is represented by the first
610 4 bytes, encoded as little endian. This partially reimplements
611 marshal.c:r_long() */
612static long
613get_long(unsigned char *buf) {
614 long x;
615 x = buf[0];
616 x |= (long)buf[1] << 8;
617 x |= (long)buf[2] << 16;
618 x |= (long)buf[3] << 24;
619#if SIZEOF_LONG > 4
620 /* Sign extension for 64-bit machines */
621 x |= -(x & 0x80000000L);
622#endif
623 return x;
624}
625
626/*
627 read_directory(archive) -> files dict (new reference)
628
629 Given a path to a Zip archive, build a dict, mapping file names
630 (local to the archive, using SEP as a separator) to toc entries.
631
632 A toc_entry is a tuple:
633
Fred Drakef5b7fd22005-11-11 19:34:56 +0000634 (__file__, # value to use for __file__, available for all files
635 compress, # compression kind; 0 for uncompressed
Just van Rossum52e14d62002-12-30 22:08:05 +0000636 data_size, # size of compressed data on disk
637 file_size, # size of decompressed data
638 file_offset, # offset of file header from start of archive
639 time, # mod time of file (in dos format)
640 date, # mod data of file (in dos format)
641 crc, # crc checksum of the data
642 )
643
644 Directories can be recognized by the trailing SEP in the name,
645 data_size and file_offset are 0.
646*/
647static PyObject *
648read_directory(char *archive)
649{
650 PyObject *files = NULL;
651 FILE *fp;
652 long compress, crc, data_size, file_size, file_offset, date, time;
Thomas Heller354e3d92003-07-22 18:10:15 +0000653 long header_offset, name_size, header_size, header_position;
Neal Norwitzd39d8612006-01-08 01:03:36 +0000654 long i, l, count;
655 size_t length;
Just van Rossum52e14d62002-12-30 22:08:05 +0000656 char path[MAXPATHLEN + 5];
657 char name[MAXPATHLEN + 5];
658 char *p, endof_central_dir[22];
Thomas Heller354e3d92003-07-22 18:10:15 +0000659 long arc_offset; /* offset from beginning of file to start of zip-archive */
Just van Rossum52e14d62002-12-30 22:08:05 +0000660
661 if (strlen(archive) > MAXPATHLEN) {
662 PyErr_SetString(PyExc_OverflowError,
663 "Zip path name is too long");
664 return NULL;
665 }
666 strcpy(path, archive);
667
668 fp = fopen(archive, "rb");
669 if (fp == NULL) {
670 PyErr_Format(ZipImportError, "can't open Zip file: "
671 "'%.200s'", archive);
672 return NULL;
673 }
Just van Rossumf4ecc752003-02-28 08:54:01 +0000674 fseek(fp, -22, SEEK_END);
Thomas Heller354e3d92003-07-22 18:10:15 +0000675 header_position = ftell(fp);
Just van Rossum52e14d62002-12-30 22:08:05 +0000676 if (fread(endof_central_dir, 1, 22, fp) != 22) {
677 fclose(fp);
678 PyErr_Format(ZipImportError, "can't read Zip file: "
679 "'%.200s'", archive);
680 return NULL;
681 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000682 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000683 /* Bad: End of Central Dir signature */
684 fclose(fp);
685 PyErr_Format(ZipImportError, "not a Zip file: "
686 "'%.200s'", archive);
687 return NULL;
688 }
689
Thomas Heller354e3d92003-07-22 18:10:15 +0000690 header_size = get_long((unsigned char *)endof_central_dir + 12);
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000691 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Thomas Heller354e3d92003-07-22 18:10:15 +0000692 arc_offset = header_position - header_offset - header_size;
693 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000694
695 files = PyDict_New();
696 if (files == NULL)
697 goto error;
698
699 length = (long)strlen(path);
700 path[length] = SEP;
701
702 /* Start of Central Directory */
703 count = 0;
704 for (;;) {
705 PyObject *t;
706 int err;
707
708 fseek(fp, header_offset, 0); /* Start of file header */
709 l = PyMarshal_ReadLongFromFile(fp);
710 if (l != 0x02014B50)
711 break; /* Bad: Central Dir File Header */
712 fseek(fp, header_offset + 10, 0);
713 compress = PyMarshal_ReadShortFromFile(fp);
714 time = PyMarshal_ReadShortFromFile(fp);
715 date = PyMarshal_ReadShortFromFile(fp);
716 crc = PyMarshal_ReadLongFromFile(fp);
717 data_size = PyMarshal_ReadLongFromFile(fp);
718 file_size = PyMarshal_ReadLongFromFile(fp);
719 name_size = PyMarshal_ReadShortFromFile(fp);
720 header_size = 46 + name_size +
721 PyMarshal_ReadShortFromFile(fp) +
722 PyMarshal_ReadShortFromFile(fp);
723 fseek(fp, header_offset + 42, 0);
Thomas Heller354e3d92003-07-22 18:10:15 +0000724 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000725 if (name_size > MAXPATHLEN)
726 name_size = MAXPATHLEN;
727
728 p = name;
729 for (i = 0; i < name_size; i++) {
730 *p = (char)getc(fp);
731 if (*p == '/')
732 *p = SEP;
733 p++;
734 }
735 *p = 0; /* Add terminating null byte */
736 header_offset += header_size;
737
738 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
739
740 t = Py_BuildValue("siiiiiii", path, compress, data_size,
741 file_size, file_offset, time, date, crc);
742 if (t == NULL)
743 goto error;
744 err = PyDict_SetItemString(files, name, t);
745 Py_DECREF(t);
746 if (err != 0)
747 goto error;
748 count++;
749 }
750 fclose(fp);
751 if (Py_VerboseFlag)
752 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
753 count, archive);
754 return files;
755error:
756 fclose(fp);
757 Py_XDECREF(files);
758 return NULL;
759}
760
761/* Return the zlib.decompress function object, or NULL if zlib couldn't
762 be imported. The function is cached when found, so subsequent calls
763 don't import zlib again. Returns a *borrowed* reference.
764 XXX This makes zlib.decompress immortal. */
765static PyObject *
766get_decompress_func(void)
767{
768 static PyObject *decompress = NULL;
769
770 if (decompress == NULL) {
771 PyObject *zlib;
772 static int importing_zlib = 0;
773
774 if (importing_zlib != 0)
775 /* Someone has a zlib.py[co] in their Zip file;
776 let's avoid a stack overflow. */
777 return NULL;
778 importing_zlib = 1;
779 zlib = PyImport_ImportModule("zlib"); /* import zlib */
780 importing_zlib = 0;
781 if (zlib != NULL) {
782 decompress = PyObject_GetAttrString(zlib,
783 "decompress");
784 Py_DECREF(zlib);
785 }
786 else
787 PyErr_Clear();
788 if (Py_VerboseFlag)
789 PySys_WriteStderr("# zipimport: zlib %s\n",
790 zlib != NULL ? "available": "UNAVAILABLE");
791 }
792 return decompress;
793}
794
795/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
796 data as a new reference. */
797static PyObject *
798get_data(char *archive, PyObject *toc_entry)
799{
800 PyObject *raw_data, *data = NULL, *decompress;
801 char *buf;
802 FILE *fp;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000803 int err;
804 Py_ssize_t bytes_read = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000805 long l;
806 char *datapath;
807 long compress, data_size, file_size, file_offset;
808 long time, date, crc;
809
Neal Norwitz0c0aad92003-02-18 03:37:49 +0000810 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
Just van Rossum52e14d62002-12-30 22:08:05 +0000811 &data_size, &file_size, &file_offset, &time,
812 &date, &crc)) {
813 return NULL;
814 }
815
816 fp = fopen(archive, "rb");
817 if (!fp) {
818 PyErr_Format(PyExc_IOError,
819 "zipimport: can not open file %s", archive);
820 return NULL;
821 }
822
823 /* Check to make sure the local file header is correct */
824 fseek(fp, file_offset, 0);
825 l = PyMarshal_ReadLongFromFile(fp);
826 if (l != 0x04034B50) {
827 /* Bad: Local File Header */
828 PyErr_Format(ZipImportError,
829 "bad local file header in %s",
830 archive);
831 fclose(fp);
832 return NULL;
833 }
834 fseek(fp, file_offset + 26, 0);
835 l = 30 + PyMarshal_ReadShortFromFile(fp) +
836 PyMarshal_ReadShortFromFile(fp); /* local header size */
837 file_offset += l; /* Start of file data */
838
839 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
840 data_size : data_size + 1);
841 if (raw_data == NULL) {
842 fclose(fp);
843 return NULL;
844 }
845 buf = PyString_AsString(raw_data);
846
847 err = fseek(fp, file_offset, 0);
848 if (err == 0)
849 bytes_read = fread(buf, 1, data_size, fp);
850 fclose(fp);
851 if (err || bytes_read != data_size) {
852 PyErr_SetString(PyExc_IOError,
853 "zipimport: can't read data");
854 Py_DECREF(raw_data);
855 return NULL;
856 }
857
858 if (compress != 0) {
859 buf[data_size] = 'Z'; /* saw this in zipfile.py */
860 data_size++;
861 }
862 buf[data_size] = '\0';
863
Guido van Rossumad8d3002007-08-03 18:40:49 +0000864 if (compress == 0) { /* data is not compressed */
865 raw_data = PyBytes_FromStringAndSize(buf, data_size);
Just van Rossum52e14d62002-12-30 22:08:05 +0000866 return raw_data;
Guido van Rossumad8d3002007-08-03 18:40:49 +0000867 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000868
869 /* Decompress with zlib */
870 decompress = get_decompress_func();
871 if (decompress == NULL) {
872 PyErr_SetString(ZipImportError,
873 "can't decompress data; "
874 "zlib not available");
875 goto error;
876 }
Just van Rossumee8f10f2003-09-07 13:36:48 +0000877 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000878error:
879 Py_DECREF(raw_data);
880 return data;
881}
882
883/* Lenient date/time comparison function. The precision of the mtime
884 in the archive is lower than the mtime stored in a .pyc: we
885 must allow a difference of at most one second. */
886static int
887eq_mtime(time_t t1, time_t t2)
888{
889 time_t d = t1 - t2;
890 if (d < 0)
891 d = -d;
892 /* dostime only stores even seconds, so be lenient */
893 return d <= 1;
894}
895
896/* Given the contents of a .py[co] file in a buffer, unmarshal the data
897 and return the code object. Return None if it the magic word doesn't
898 match (we do this instead of raising an exception as we fall back
899 to .py if available and we don't want to mask other errors).
900 Returns a new reference. */
901static PyObject *
902unmarshal_code(char *pathname, PyObject *data, time_t mtime)
903{
904 PyObject *code;
Guido van Rossumad8d3002007-08-03 18:40:49 +0000905 char *buf = PyBytes_AsString(data);
906 Py_ssize_t size = PyBytes_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +0000907
908 if (size <= 9) {
909 PyErr_SetString(ZipImportError,
910 "bad pyc data");
911 return NULL;
912 }
913
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000914 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000915 if (Py_VerboseFlag)
916 PySys_WriteStderr("# %s has bad magic\n",
917 pathname);
918 Py_INCREF(Py_None);
919 return Py_None; /* signal caller to try alternative */
920 }
921
Just van Rossum9a3129c2003-01-03 11:18:56 +0000922 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
923 mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000924 if (Py_VerboseFlag)
925 PySys_WriteStderr("# %s has bad mtime\n",
926 pathname);
927 Py_INCREF(Py_None);
928 return Py_None; /* signal caller to try alternative */
929 }
930
931 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
932 if (code == NULL)
933 return NULL;
934 if (!PyCode_Check(code)) {
935 Py_DECREF(code);
936 PyErr_Format(PyExc_TypeError,
937 "compiled module %.200s is not a code object",
938 pathname);
939 return NULL;
940 }
941 return code;
942}
943
944/* Replace any occurances of "\r\n?" in the input string with "\n".
945 This converts DOS and Mac line endings to Unix line endings.
946 Also append a trailing "\n" to be compatible with
947 PyParser_SimpleParseFile(). Returns a new reference. */
948static PyObject *
949normalize_line_endings(PyObject *source)
950{
Guido van Rossumad8d3002007-08-03 18:40:49 +0000951 char *buf, *q, *p = PyBytes_AsString(source);
Just van Rossum52e14d62002-12-30 22:08:05 +0000952 PyObject *fixed_source;
Guido van Rossumad8d3002007-08-03 18:40:49 +0000953 int len = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000954
Guido van Rossumad8d3002007-08-03 18:40:49 +0000955 if (!p) {
956 return PyBytes_FromStringAndSize("\n\0", 2);
957 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000958
Just van Rossum9a3129c2003-01-03 11:18:56 +0000959 /* one char extra for trailing \n and one for terminating \0 */
Guido van Rossumad8d3002007-08-03 18:40:49 +0000960 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
Just van Rossum9a3129c2003-01-03 11:18:56 +0000961 if (buf == NULL) {
962 PyErr_SetString(PyExc_MemoryError,
963 "zipimport: no memory to allocate "
964 "source buffer");
Just van Rossum52e14d62002-12-30 22:08:05 +0000965 return NULL;
Just van Rossum9a3129c2003-01-03 11:18:56 +0000966 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000967 /* replace "\r\n?" by "\n" */
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000968 for (q = buf; *p != '\0'; p++) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000969 if (*p == '\r') {
970 *q++ = '\n';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000971 if (*(p + 1) == '\n')
Just van Rossum52e14d62002-12-30 22:08:05 +0000972 p++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000973 }
974 else
975 *q++ = *p;
Guido van Rossumad8d3002007-08-03 18:40:49 +0000976 len++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000977 }
978 *q++ = '\n'; /* add trailing \n */
979 *q = '\0';
Guido van Rossumad8d3002007-08-03 18:40:49 +0000980 fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
Just van Rossum9a3129c2003-01-03 11:18:56 +0000981 PyMem_Free(buf);
Just van Rossum52e14d62002-12-30 22:08:05 +0000982 return fixed_source;
983}
984
985/* Given a string buffer containing Python source code, compile it
986 return and return a code object as a new reference. */
987static PyObject *
988compile_source(char *pathname, PyObject *source)
989{
990 PyObject *code, *fixed_source;
991
992 fixed_source = normalize_line_endings(source);
993 if (fixed_source == NULL)
994 return NULL;
995
Guido van Rossumad8d3002007-08-03 18:40:49 +0000996 code = Py_CompileString(PyBytes_AsString(fixed_source), pathname,
Just van Rossum52e14d62002-12-30 22:08:05 +0000997 Py_file_input);
998 Py_DECREF(fixed_source);
999 return code;
1000}
1001
1002/* Convert the date/time values found in the Zip archive to a value
1003 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001004static time_t
1005parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001006{
1007 struct tm stm;
1008
1009 stm.tm_sec = (dostime & 0x1f) * 2;
1010 stm.tm_min = (dostime >> 5) & 0x3f;
1011 stm.tm_hour = (dostime >> 11) & 0x1f;
1012 stm.tm_mday = dosdate & 0x1f;
1013 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1014 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
Just van Rossum547eb422003-04-08 20:07:15 +00001015 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001016
1017 return mktime(&stm);
1018}
1019
1020/* Given a path to a .pyc or .pyo file in the archive, return the
1021 modifictaion time of the matching .py file, or 0 if no source
1022 is available. */
1023static time_t
1024get_mtime_of_source(ZipImporter *self, char *path)
1025{
1026 PyObject *toc_entry;
1027 time_t mtime = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001028 Py_ssize_t lastchar = strlen(path) - 1;
Just van Rossum52e14d62002-12-30 22:08:05 +00001029 char savechar = path[lastchar];
1030 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1031 toc_entry = PyDict_GetItemString(self->files, path);
1032 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1033 PyTuple_Size(toc_entry) == 8) {
Just van Rossumf8b6de12002-12-31 09:51:59 +00001034 /* fetch the time stamp of the .py file for comparison
1035 with an embedded pyc time stamp */
1036 int time, date;
Just van Rossum52e14d62002-12-30 22:08:05 +00001037 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1038 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1039 mtime = parse_dostime(time, date);
1040 }
1041 path[lastchar] = savechar;
1042 return mtime;
1043}
1044
1045/* Return the code object for the module named by 'fullname' from the
1046 Zip archive as a new reference. */
1047static PyObject *
1048get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1049 time_t mtime, PyObject *toc_entry)
1050{
1051 PyObject *data, *code;
1052 char *modpath;
1053 char *archive = PyString_AsString(self->archive);
1054
1055 if (archive == NULL)
1056 return NULL;
1057
1058 data = get_data(archive, toc_entry);
1059 if (data == NULL)
1060 return NULL;
1061
1062 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1063
1064 if (isbytecode) {
1065 code = unmarshal_code(modpath, data, mtime);
1066 }
1067 else {
1068 code = compile_source(modpath, data);
1069 }
1070 Py_DECREF(data);
1071 return code;
1072}
1073
1074/* Get the code object assoiciated with the module specified by
1075 'fullname'. */
1076static PyObject *
1077get_module_code(ZipImporter *self, char *fullname,
1078 int *p_ispackage, char **p_modpath)
1079{
1080 PyObject *toc_entry;
1081 char *subname, path[MAXPATHLEN + 1];
1082 int len;
1083 struct st_zip_searchorder *zso;
1084
1085 subname = get_subname(fullname);
1086
1087 len = make_filename(PyString_AsString(self->prefix), subname, path);
1088 if (len < 0)
1089 return NULL;
1090
1091 for (zso = zip_searchorder; *zso->suffix; zso++) {
1092 PyObject *code = NULL;
1093
1094 strcpy(path + len, zso->suffix);
1095 if (Py_VerboseFlag > 1)
1096 PySys_WriteStderr("# trying %s%c%s\n",
1097 PyString_AsString(self->archive),
1098 SEP, path);
1099 toc_entry = PyDict_GetItemString(self->files, path);
1100 if (toc_entry != NULL) {
1101 time_t mtime = 0;
1102 int ispackage = zso->type & IS_PACKAGE;
1103 int isbytecode = zso->type & IS_BYTECODE;
1104
1105 if (isbytecode)
1106 mtime = get_mtime_of_source(self, path);
1107 if (p_ispackage != NULL)
1108 *p_ispackage = ispackage;
1109 code = get_code_from_data(self, ispackage,
1110 isbytecode, mtime,
1111 toc_entry);
1112 if (code == Py_None) {
1113 /* bad magic number or non-matching mtime
1114 in byte code, try next */
1115 Py_DECREF(code);
1116 continue;
1117 }
1118 if (code != NULL && p_modpath != NULL)
1119 *p_modpath = PyString_AsString(
1120 PyTuple_GetItem(toc_entry, 0));
1121 return code;
1122 }
1123 }
1124 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1125 return NULL;
1126}
1127
1128
1129/* Module init */
1130
1131PyDoc_STRVAR(zipimport_doc,
1132"zipimport provides support for importing Python modules from Zip archives.\n\
1133\n\
1134This module exports three objects:\n\
1135- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001136- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001137 subclass of ImportError, so it can be caught as ImportError, too.\n\
1138- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1139 info dicts, as used in zipimporter._files.\n\
1140\n\
1141It is usually not needed to use the zipimport module explicitly; it is\n\
1142used by the builtin import mechanism for sys.path items that are paths\n\
1143to Zip archives.");
1144
1145PyMODINIT_FUNC
1146initzipimport(void)
1147{
1148 PyObject *mod;
1149
1150 if (PyType_Ready(&ZipImporter_Type) < 0)
1151 return;
1152
1153 /* Correct directory separator */
1154 zip_searchorder[0].suffix[0] = SEP;
1155 zip_searchorder[1].suffix[0] = SEP;
1156 zip_searchorder[2].suffix[0] = SEP;
1157 if (Py_OptimizeFlag) {
1158 /* Reverse *.pyc and *.pyo */
1159 struct st_zip_searchorder tmp;
1160 tmp = zip_searchorder[0];
1161 zip_searchorder[0] = zip_searchorder[1];
1162 zip_searchorder[1] = tmp;
1163 tmp = zip_searchorder[3];
1164 zip_searchorder[3] = zip_searchorder[4];
1165 zip_searchorder[4] = tmp;
1166 }
1167
1168 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1169 NULL, PYTHON_API_VERSION);
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00001170 if (mod == NULL)
1171 return;
Just van Rossum52e14d62002-12-30 22:08:05 +00001172
1173 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1174 PyExc_ImportError, NULL);
1175 if (ZipImportError == NULL)
1176 return;
1177
1178 Py_INCREF(ZipImportError);
1179 if (PyModule_AddObject(mod, "ZipImportError",
1180 ZipImportError) < 0)
1181 return;
1182
1183 Py_INCREF(&ZipImporter_Type);
1184 if (PyModule_AddObject(mod, "zipimporter",
1185 (PyObject *)&ZipImporter_Type) < 0)
1186 return;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001187
Just van Rossum52e14d62002-12-30 22:08:05 +00001188 zip_directory_cache = PyDict_New();
1189 if (zip_directory_cache == NULL)
1190 return;
1191 Py_INCREF(zip_directory_cache);
1192 if (PyModule_AddObject(mod, "_zip_directory_cache",
1193 zip_directory_cache) < 0)
1194 return;
1195}