blob: 183692cc2c70e6ef2e462764dc7f1ab3565e8e82 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
13 char suffix[14];
14 int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Just van Rossum52e14d62002-12-30 22:08:05 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
41};
42
43static PyTypeObject ZipImporter_Type;
44static PyObject *ZipImportError;
45static PyObject *zip_directory_cache = NULL;
46
47/* forward decls */
48static PyObject *read_directory(char *archive);
49static PyObject *get_data(char *archive, PyObject *toc_entry);
50static PyObject *get_module_code(ZipImporter *self, char *fullname,
51 int *p_ispackage, char **p_modpath);
52
53
54#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
55
56
57/* zipimporter.__init__
58 Split the "subdirectory" from the Zip archive path, lookup a matching
59 entry in sys.path_importer_cache, fetch the file directory from there
60 if found, or else read it from the archive. */
61static int
62zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
63{
64 char *path, *p, *prefix, buf[MAXPATHLEN+2];
65 int len;
66
Georg Brandl02c42872005-08-26 06:42:30 +000067 if (!_PyArg_NoKeywords("zipimporter()", kwds))
68 return -1;
69
Just van Rossum52e14d62002-12-30 22:08:05 +000070 if (!PyArg_ParseTuple(args, "s:zipimporter",
71 &path))
72 return -1;
73
74 len = strlen(path);
75 if (len == 0) {
76 PyErr_SetString(ZipImportError, "archive path is empty");
77 return -1;
78 }
79 if (len >= MAXPATHLEN) {
80 PyErr_SetString(ZipImportError,
81 "archive path too long");
82 return -1;
83 }
84 strcpy(buf, path);
85
86#ifdef ALTSEP
87 for (p = buf; *p; p++) {
88 if (*p == ALTSEP)
89 *p = SEP;
90 }
91#endif
92
93 path = NULL;
94 prefix = NULL;
95 for (;;) {
Martin v. Löwisa94568a2003-05-10 07:36:56 +000096#ifndef RISCOS
Just van Rossum52e14d62002-12-30 22:08:05 +000097 struct stat statbuf;
98 int rv;
99
100 rv = stat(buf, &statbuf);
101 if (rv == 0) {
102 /* it exists */
103 if (S_ISREG(statbuf.st_mode))
104 /* it's a file */
105 path = buf;
106 break;
107 }
Martin v. Löwisa94568a2003-05-10 07:36:56 +0000108#else
109 if (object_exists(buf)) {
110 /* it exists */
111 if (isfile(buf))
112 /* it's a file */
113 path = buf;
114 break;
115 }
116#endif
Just van Rossum52e14d62002-12-30 22:08:05 +0000117 /* back up one path element */
Just van Rossumd35c6db2003-01-02 12:55:48 +0000118 p = strrchr(buf, SEP);
Just van Rossum52e14d62002-12-30 22:08:05 +0000119 if (prefix != NULL)
120 *prefix = SEP;
121 if (p == NULL)
122 break;
123 *p = '\0';
124 prefix = p;
125 }
126 if (path != NULL) {
127 PyObject *files;
128 files = PyDict_GetItemString(zip_directory_cache, path);
129 if (files == NULL) {
130 files = read_directory(buf);
131 if (files == NULL)
132 return -1;
133 if (PyDict_SetItemString(zip_directory_cache, path,
134 files) != 0)
135 return -1;
136 }
137 else
138 Py_INCREF(files);
139 self->files = files;
140 }
141 else {
142 PyErr_SetString(ZipImportError, "not a Zip file");
143 return -1;
144 }
145
146 if (prefix == NULL)
147 prefix = "";
148 else {
149 prefix++;
150 len = strlen(prefix);
151 if (prefix[len-1] != SEP) {
152 /* add trailing SEP */
153 prefix[len] = SEP;
154 prefix[len + 1] = '\0';
155 }
156 }
157
158 self->archive = PyString_FromString(buf);
159 if (self->archive == NULL)
160 return -1;
161
162 self->prefix = PyString_FromString(prefix);
163 if (self->prefix == NULL)
164 return -1;
165
166 return 0;
167}
168
169/* GC support. */
170static int
171zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
172{
173 ZipImporter *self = (ZipImporter *)obj;
174 int err;
175
176 if (self->files != NULL) {
177 err = visit(self->files, arg);
178 if (err)
179 return err;
180 }
181 return 0;
182}
183
184static void
185zipimporter_dealloc(ZipImporter *self)
186{
187 PyObject_GC_UnTrack(self);
188 Py_XDECREF(self->archive);
Just van Rossumf8b6de12002-12-31 09:51:59 +0000189 Py_XDECREF(self->prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000190 Py_XDECREF(self->files);
191 self->ob_type->tp_free((PyObject *)self);
192}
193
194static PyObject *
195zipimporter_repr(ZipImporter *self)
196{
197 char buf[500];
198 char *archive = "???";
199 char *prefix = "";
200
201 if (self->archive != NULL && PyString_Check(self->archive))
202 archive = PyString_AsString(self->archive);
203 if (self->prefix != NULL && PyString_Check(self->prefix))
204 prefix = PyString_AsString(self->prefix);
205 if (prefix != NULL && *prefix)
206 PyOS_snprintf(buf, sizeof(buf),
207 "<zipimporter object \"%.300s%c%.150s\">",
208 archive, SEP, prefix);
209 else
210 PyOS_snprintf(buf, sizeof(buf),
211 "<zipimporter object \"%.300s\">",
212 archive);
213 return PyString_FromString(buf);
214}
215
216/* return fullname.split(".")[-1] */
217static char *
218get_subname(char *fullname)
219{
220 char *subname = strrchr(fullname, '.');
221 if (subname == NULL)
222 subname = fullname;
223 else
224 subname++;
225 return subname;
226}
227
228/* Given a (sub)modulename, write the potential file path in the
229 archive (without extension) to the path buffer. Return the
230 length of the resulting string. */
231static int
232make_filename(char *prefix, char *name, char *path)
233{
234 int len;
235 char *p;
236
237 len = strlen(prefix);
238
239 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
240 if (len + strlen(name) + 13 >= MAXPATHLEN) {
241 PyErr_SetString(ZipImportError, "path too long");
242 return -1;
243 }
244
245 strcpy(path, prefix);
246 strcpy(path + len, name);
247 for (p = path + len; *p; p++) {
248 if (*p == '.')
249 *p = SEP;
250 }
251 len += strlen(name);
252 return len;
253}
254
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000255enum zi_module_info {
Just van Rossum52e14d62002-12-30 22:08:05 +0000256 MI_ERROR,
257 MI_NOT_FOUND,
258 MI_MODULE,
259 MI_PACKAGE
260};
261
262/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000263static enum zi_module_info
Just van Rossum52e14d62002-12-30 22:08:05 +0000264get_module_info(ZipImporter *self, char *fullname)
265{
266 char *subname, path[MAXPATHLEN + 1];
267 int len;
268 struct st_zip_searchorder *zso;
269
270 subname = get_subname(fullname);
271
272 len = make_filename(PyString_AsString(self->prefix), subname, path);
273 if (len < 0)
274 return MI_ERROR;
275
276 for (zso = zip_searchorder; *zso->suffix; zso++) {
277 strcpy(path + len, zso->suffix);
278 if (PyDict_GetItemString(self->files, path) != NULL) {
279 if (zso->type & IS_PACKAGE)
280 return MI_PACKAGE;
281 else
282 return MI_MODULE;
283 }
284 }
285 return MI_NOT_FOUND;
286}
287
288/* Check whether we can satisfy the import of the module named by
289 'fullname'. Return self if we can, None if we can't. */
290static PyObject *
291zipimporter_find_module(PyObject *obj, PyObject *args)
292{
293 ZipImporter *self = (ZipImporter *)obj;
294 PyObject *path = NULL;
295 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000296 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000297
298 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
299 &fullname, &path))
300 return NULL;
301
302 mi = get_module_info(self, fullname);
303 if (mi == MI_ERROR)
304 return NULL;
305 if (mi == MI_NOT_FOUND) {
306 Py_INCREF(Py_None);
307 return Py_None;
308 }
309 Py_INCREF(self);
310 return (PyObject *)self;
311}
312
313/* Load and return the module named by 'fullname'. */
314static PyObject *
315zipimporter_load_module(PyObject *obj, PyObject *args)
316{
317 ZipImporter *self = (ZipImporter *)obj;
318 PyObject *code, *mod, *dict;
319 char *fullname, *modpath;
320 int ispackage;
321
322 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
323 &fullname))
324 return NULL;
325
326 code = get_module_code(self, fullname, &ispackage, &modpath);
327 if (code == NULL)
328 return NULL;
329
330 mod = PyImport_AddModule(fullname);
331 if (mod == NULL) {
332 Py_DECREF(code);
333 return NULL;
334 }
335 dict = PyModule_GetDict(mod);
336
337 /* mod.__loader__ = self */
338 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
339 goto error;
340
341 if (ispackage) {
342 /* add __path__ to the module *before* the code gets
343 executed */
344 PyObject *pkgpath, *fullpath;
345 char *prefix = PyString_AsString(self->prefix);
346 char *subname = get_subname(fullname);
347 int err;
348
349 fullpath = PyString_FromFormat("%s%c%s%s",
350 PyString_AsString(self->archive),
351 SEP,
352 *prefix ? prefix : "",
353 subname);
354 if (fullpath == NULL)
355 goto error;
356
357 pkgpath = Py_BuildValue("[O]", fullpath);
358 Py_DECREF(fullpath);
359 if (pkgpath == NULL)
360 goto error;
361 err = PyDict_SetItemString(dict, "__path__", pkgpath);
362 Py_DECREF(pkgpath);
363 if (err != 0)
364 goto error;
365 }
366 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
367 Py_DECREF(code);
368 if (Py_VerboseFlag)
369 PySys_WriteStderr("import %s # loaded from Zip %s\n",
370 fullname, modpath);
371 return mod;
372error:
373 Py_DECREF(code);
374 Py_DECREF(mod);
375 return NULL;
376}
377
378/* Return a bool signifying whether the module is a package or not. */
379static PyObject *
380zipimporter_is_package(PyObject *obj, PyObject *args)
381{
382 ZipImporter *self = (ZipImporter *)obj;
383 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000384 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000385
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000386 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
Just van Rossum52e14d62002-12-30 22:08:05 +0000387 &fullname))
388 return NULL;
389
390 mi = get_module_info(self, fullname);
391 if (mi == MI_ERROR)
392 return NULL;
393 if (mi == MI_NOT_FOUND) {
394 PyErr_Format(ZipImportError, "can't find module '%.200s'",
395 fullname);
396 return NULL;
397 }
398 return PyBool_FromLong(mi == MI_PACKAGE);
399}
400
401static PyObject *
402zipimporter_get_data(PyObject *obj, PyObject *args)
403{
404 ZipImporter *self = (ZipImporter *)obj;
405 char *path;
406#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000407 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000408#endif
409 PyObject *toc_entry;
410 int len;
411
412 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
413 return NULL;
414
415#ifdef ALTSEP
416 if (strlen(path) >= MAXPATHLEN) {
417 PyErr_SetString(ZipImportError, "path too long");
418 return NULL;
419 }
420 strcpy(buf, path);
421 for (p = buf; *p; p++) {
422 if (*p == ALTSEP)
423 *p = SEP;
424 }
425 path = buf;
426#endif
427 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000428 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000429 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
430 path[len] == SEP) {
431 path = path + len + 1;
432 }
433
434 toc_entry = PyDict_GetItemString(self->files, path);
435 if (toc_entry == NULL) {
436 PyErr_Format(PyExc_IOError, "file not found [%.200s]",
437 path);
438 return NULL;
439 }
440 return get_data(PyString_AsString(self->archive), toc_entry);
441}
442
443static PyObject *
444zipimporter_get_code(PyObject *obj, PyObject *args)
445{
446 ZipImporter *self = (ZipImporter *)obj;
447 char *fullname;
448
449 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
450 return NULL;
451
452 return get_module_code(self, fullname, NULL, NULL);
453}
454
455static PyObject *
456zipimporter_get_source(PyObject *obj, PyObject *args)
457{
458 ZipImporter *self = (ZipImporter *)obj;
459 PyObject *toc_entry;
460 char *fullname, *subname, path[MAXPATHLEN+1];
461 int len;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000462 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000463
464 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
465 return NULL;
466
467 mi = get_module_info(self, fullname);
468 if (mi == MI_ERROR)
469 return NULL;
470 if (mi == MI_NOT_FOUND) {
471 PyErr_Format(ZipImportError, "can't find module '%.200s'",
472 fullname);
473 return NULL;
474 }
475 subname = get_subname(fullname);
476
477 len = make_filename(PyString_AsString(self->prefix), subname, path);
478 if (len < 0)
479 return NULL;
480
481 if (mi == MI_PACKAGE) {
482 path[len] = SEP;
483 strcpy(path + len + 1, "__init__.py");
484 }
485 else
486 strcpy(path + len, ".py");
487
488 toc_entry = PyDict_GetItemString(self->files, path);
489 if (toc_entry != NULL)
490 return get_data(PyString_AsString(self->archive), toc_entry);
491
492 /* we have the module, but no source */
493 Py_INCREF(Py_None);
494 return Py_None;
495}
496
497PyDoc_STRVAR(doc_find_module,
498"find_module(fullname, path=None) -> self or None.\n\
499\n\
500Search for a module specified by 'fullname'. 'fullname' must be the\n\
501fully qualified (dotted) module name. It returns the zipimporter\n\
502instance itself if the module was found, or None if it wasn't.\n\
503The optional 'path' argument is ignored -- it's there for compatibility\n\
504with the importer protocol.");
505
506PyDoc_STRVAR(doc_load_module,
507"load_module(fullname) -> module.\n\
508\n\
509Load the module specified by 'fullname'. 'fullname' must be the\n\
510fully qualified (dotted) module name. It returns the imported\n\
511module, or raises ZipImportError if it wasn't found.");
512
513PyDoc_STRVAR(doc_get_data,
514"get_data(pathname) -> string with file data.\n\
515\n\
516Return the data associated with 'pathname'. Raise IOError if\n\
517the file wasn't found.");
518
519PyDoc_STRVAR(doc_is_package,
520"is_package(fullname) -> bool.\n\
521\n\
522Return True if the module specified by fullname is a package.\n\
523Raise ZipImportError is the module couldn't be found.");
524
525PyDoc_STRVAR(doc_get_code,
526"get_code(fullname) -> code object.\n\
527\n\
528Return the code object for the specified module. Raise ZipImportError\n\
529is the module couldn't be found.");
530
531PyDoc_STRVAR(doc_get_source,
532"get_source(fullname) -> source string.\n\
533\n\
534Return the source code for the specified module. Raise ZipImportError\n\
535is the module couldn't be found, return None if the archive does\n\
536contain the module, but has no source for it.");
537
538static PyMethodDef zipimporter_methods[] = {
539 {"find_module", zipimporter_find_module, METH_VARARGS,
540 doc_find_module},
541 {"load_module", zipimporter_load_module, METH_VARARGS,
542 doc_load_module},
543 {"get_data", zipimporter_get_data, METH_VARARGS,
544 doc_get_data},
545 {"get_code", zipimporter_get_code, METH_VARARGS,
546 doc_get_code},
547 {"get_source", zipimporter_get_source, METH_VARARGS,
548 doc_get_source},
549 {"is_package", zipimporter_is_package, METH_VARARGS,
550 doc_is_package},
551 {NULL, NULL} /* sentinel */
552};
553
554static PyMemberDef zipimporter_members[] = {
555 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
556 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
557 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
558 {NULL}
559};
560
561PyDoc_STRVAR(zipimporter_doc,
562"zipimporter(archivepath) -> zipimporter object\n\
563\n\
564Create a new zipimporter instance. 'archivepath' must be a path to\n\
565a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
566a valid Zip archive.");
567
568#define DEFERRED_ADDRESS(ADDR) 0
569
570static PyTypeObject ZipImporter_Type = {
571 PyObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type))
572 0,
573 "zipimport.zipimporter",
574 sizeof(ZipImporter),
575 0, /* tp_itemsize */
576 (destructor)zipimporter_dealloc, /* tp_dealloc */
577 0, /* tp_print */
578 0, /* tp_getattr */
579 0, /* tp_setattr */
580 0, /* tp_compare */
581 (reprfunc)zipimporter_repr, /* tp_repr */
582 0, /* tp_as_number */
583 0, /* tp_as_sequence */
584 0, /* tp_as_mapping */
585 0, /* tp_hash */
586 0, /* tp_call */
587 0, /* tp_str */
588 PyObject_GenericGetAttr, /* tp_getattro */
589 0, /* tp_setattro */
590 0, /* tp_as_buffer */
591 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
592 Py_TPFLAGS_HAVE_GC, /* tp_flags */
593 zipimporter_doc, /* tp_doc */
594 zipimporter_traverse, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /* tp_weaklistoffset */
598 0, /* tp_iter */
599 0, /* tp_iternext */
600 zipimporter_methods, /* tp_methods */
601 zipimporter_members, /* tp_members */
602 0, /* tp_getset */
603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
608 (initproc)zipimporter_init, /* tp_init */
609 PyType_GenericAlloc, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
611 PyObject_GC_Del, /* tp_free */
612};
613
614
615/* implementation */
616
Just van Rossum52e14d62002-12-30 22:08:05 +0000617/* Given a buffer, return the long that is represented by the first
618 4 bytes, encoded as little endian. This partially reimplements
619 marshal.c:r_long() */
620static long
621get_long(unsigned char *buf) {
622 long x;
623 x = buf[0];
624 x |= (long)buf[1] << 8;
625 x |= (long)buf[2] << 16;
626 x |= (long)buf[3] << 24;
627#if SIZEOF_LONG > 4
628 /* Sign extension for 64-bit machines */
629 x |= -(x & 0x80000000L);
630#endif
631 return x;
632}
633
634/*
635 read_directory(archive) -> files dict (new reference)
636
637 Given a path to a Zip archive, build a dict, mapping file names
638 (local to the archive, using SEP as a separator) to toc entries.
639
640 A toc_entry is a tuple:
641
Fred Drakef5b7fd22005-11-11 19:34:56 +0000642 (__file__, # value to use for __file__, available for all files
643 compress, # compression kind; 0 for uncompressed
Just van Rossum52e14d62002-12-30 22:08:05 +0000644 data_size, # size of compressed data on disk
645 file_size, # size of decompressed data
646 file_offset, # offset of file header from start of archive
647 time, # mod time of file (in dos format)
648 date, # mod data of file (in dos format)
649 crc, # crc checksum of the data
650 )
651
652 Directories can be recognized by the trailing SEP in the name,
653 data_size and file_offset are 0.
654*/
655static PyObject *
656read_directory(char *archive)
657{
658 PyObject *files = NULL;
659 FILE *fp;
660 long compress, crc, data_size, file_size, file_offset, date, time;
Thomas Heller354e3d92003-07-22 18:10:15 +0000661 long header_offset, name_size, header_size, header_position;
Just van Rossum52e14d62002-12-30 22:08:05 +0000662 long i, l, length, count;
663 char path[MAXPATHLEN + 5];
664 char name[MAXPATHLEN + 5];
665 char *p, endof_central_dir[22];
Thomas Heller354e3d92003-07-22 18:10:15 +0000666 long arc_offset; /* offset from beginning of file to start of zip-archive */
Just van Rossum52e14d62002-12-30 22:08:05 +0000667
668 if (strlen(archive) > MAXPATHLEN) {
669 PyErr_SetString(PyExc_OverflowError,
670 "Zip path name is too long");
671 return NULL;
672 }
673 strcpy(path, archive);
674
675 fp = fopen(archive, "rb");
676 if (fp == NULL) {
677 PyErr_Format(ZipImportError, "can't open Zip file: "
678 "'%.200s'", archive);
679 return NULL;
680 }
Just van Rossumf4ecc752003-02-28 08:54:01 +0000681 fseek(fp, -22, SEEK_END);
Thomas Heller354e3d92003-07-22 18:10:15 +0000682 header_position = ftell(fp);
Just van Rossum52e14d62002-12-30 22:08:05 +0000683 if (fread(endof_central_dir, 1, 22, fp) != 22) {
684 fclose(fp);
685 PyErr_Format(ZipImportError, "can't read Zip file: "
686 "'%.200s'", archive);
687 return NULL;
688 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000689 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000690 /* Bad: End of Central Dir signature */
691 fclose(fp);
692 PyErr_Format(ZipImportError, "not a Zip file: "
693 "'%.200s'", archive);
694 return NULL;
695 }
696
Thomas Heller354e3d92003-07-22 18:10:15 +0000697 header_size = get_long((unsigned char *)endof_central_dir + 12);
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000698 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Thomas Heller354e3d92003-07-22 18:10:15 +0000699 arc_offset = header_position - header_offset - header_size;
700 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000701
702 files = PyDict_New();
703 if (files == NULL)
704 goto error;
705
706 length = (long)strlen(path);
707 path[length] = SEP;
708
709 /* Start of Central Directory */
710 count = 0;
711 for (;;) {
712 PyObject *t;
713 int err;
714
715 fseek(fp, header_offset, 0); /* Start of file header */
716 l = PyMarshal_ReadLongFromFile(fp);
717 if (l != 0x02014B50)
718 break; /* Bad: Central Dir File Header */
719 fseek(fp, header_offset + 10, 0);
720 compress = PyMarshal_ReadShortFromFile(fp);
721 time = PyMarshal_ReadShortFromFile(fp);
722 date = PyMarshal_ReadShortFromFile(fp);
723 crc = PyMarshal_ReadLongFromFile(fp);
724 data_size = PyMarshal_ReadLongFromFile(fp);
725 file_size = PyMarshal_ReadLongFromFile(fp);
726 name_size = PyMarshal_ReadShortFromFile(fp);
727 header_size = 46 + name_size +
728 PyMarshal_ReadShortFromFile(fp) +
729 PyMarshal_ReadShortFromFile(fp);
730 fseek(fp, header_offset + 42, 0);
Thomas Heller354e3d92003-07-22 18:10:15 +0000731 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000732 if (name_size > MAXPATHLEN)
733 name_size = MAXPATHLEN;
734
735 p = name;
736 for (i = 0; i < name_size; i++) {
737 *p = (char)getc(fp);
738 if (*p == '/')
739 *p = SEP;
740 p++;
741 }
742 *p = 0; /* Add terminating null byte */
743 header_offset += header_size;
744
745 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
746
747 t = Py_BuildValue("siiiiiii", path, compress, data_size,
748 file_size, file_offset, time, date, crc);
749 if (t == NULL)
750 goto error;
751 err = PyDict_SetItemString(files, name, t);
752 Py_DECREF(t);
753 if (err != 0)
754 goto error;
755 count++;
756 }
757 fclose(fp);
758 if (Py_VerboseFlag)
759 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
760 count, archive);
761 return files;
762error:
763 fclose(fp);
764 Py_XDECREF(files);
765 return NULL;
766}
767
768/* Return the zlib.decompress function object, or NULL if zlib couldn't
769 be imported. The function is cached when found, so subsequent calls
770 don't import zlib again. Returns a *borrowed* reference.
771 XXX This makes zlib.decompress immortal. */
772static PyObject *
773get_decompress_func(void)
774{
775 static PyObject *decompress = NULL;
776
777 if (decompress == NULL) {
778 PyObject *zlib;
779 static int importing_zlib = 0;
780
781 if (importing_zlib != 0)
782 /* Someone has a zlib.py[co] in their Zip file;
783 let's avoid a stack overflow. */
784 return NULL;
785 importing_zlib = 1;
786 zlib = PyImport_ImportModule("zlib"); /* import zlib */
787 importing_zlib = 0;
788 if (zlib != NULL) {
789 decompress = PyObject_GetAttrString(zlib,
790 "decompress");
791 Py_DECREF(zlib);
792 }
793 else
794 PyErr_Clear();
795 if (Py_VerboseFlag)
796 PySys_WriteStderr("# zipimport: zlib %s\n",
797 zlib != NULL ? "available": "UNAVAILABLE");
798 }
799 return decompress;
800}
801
802/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
803 data as a new reference. */
804static PyObject *
805get_data(char *archive, PyObject *toc_entry)
806{
807 PyObject *raw_data, *data = NULL, *decompress;
808 char *buf;
809 FILE *fp;
810 int err, bytes_read = 0;
811 long l;
812 char *datapath;
813 long compress, data_size, file_size, file_offset;
814 long time, date, crc;
815
Neal Norwitz0c0aad92003-02-18 03:37:49 +0000816 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
Just van Rossum52e14d62002-12-30 22:08:05 +0000817 &data_size, &file_size, &file_offset, &time,
818 &date, &crc)) {
819 return NULL;
820 }
821
822 fp = fopen(archive, "rb");
823 if (!fp) {
824 PyErr_Format(PyExc_IOError,
825 "zipimport: can not open file %s", archive);
826 return NULL;
827 }
828
829 /* Check to make sure the local file header is correct */
830 fseek(fp, file_offset, 0);
831 l = PyMarshal_ReadLongFromFile(fp);
832 if (l != 0x04034B50) {
833 /* Bad: Local File Header */
834 PyErr_Format(ZipImportError,
835 "bad local file header in %s",
836 archive);
837 fclose(fp);
838 return NULL;
839 }
840 fseek(fp, file_offset + 26, 0);
841 l = 30 + PyMarshal_ReadShortFromFile(fp) +
842 PyMarshal_ReadShortFromFile(fp); /* local header size */
843 file_offset += l; /* Start of file data */
844
845 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
846 data_size : data_size + 1);
847 if (raw_data == NULL) {
848 fclose(fp);
849 return NULL;
850 }
851 buf = PyString_AsString(raw_data);
852
853 err = fseek(fp, file_offset, 0);
854 if (err == 0)
855 bytes_read = fread(buf, 1, data_size, fp);
856 fclose(fp);
857 if (err || bytes_read != data_size) {
858 PyErr_SetString(PyExc_IOError,
859 "zipimport: can't read data");
860 Py_DECREF(raw_data);
861 return NULL;
862 }
863
864 if (compress != 0) {
865 buf[data_size] = 'Z'; /* saw this in zipfile.py */
866 data_size++;
867 }
868 buf[data_size] = '\0';
869
870 if (compress == 0) /* data is not compressed */
871 return raw_data;
872
873 /* Decompress with zlib */
874 decompress = get_decompress_func();
875 if (decompress == NULL) {
876 PyErr_SetString(ZipImportError,
877 "can't decompress data; "
878 "zlib not available");
879 goto error;
880 }
Just van Rossumee8f10f2003-09-07 13:36:48 +0000881 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000882error:
883 Py_DECREF(raw_data);
884 return data;
885}
886
887/* Lenient date/time comparison function. The precision of the mtime
888 in the archive is lower than the mtime stored in a .pyc: we
889 must allow a difference of at most one second. */
890static int
891eq_mtime(time_t t1, time_t t2)
892{
893 time_t d = t1 - t2;
894 if (d < 0)
895 d = -d;
896 /* dostime only stores even seconds, so be lenient */
897 return d <= 1;
898}
899
900/* Given the contents of a .py[co] file in a buffer, unmarshal the data
901 and return the code object. Return None if it the magic word doesn't
902 match (we do this instead of raising an exception as we fall back
903 to .py if available and we don't want to mask other errors).
904 Returns a new reference. */
905static PyObject *
906unmarshal_code(char *pathname, PyObject *data, time_t mtime)
907{
908 PyObject *code;
909 char *buf = PyString_AsString(data);
910 int size = PyString_Size(data);
911
912 if (size <= 9) {
913 PyErr_SetString(ZipImportError,
914 "bad pyc data");
915 return NULL;
916 }
917
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000918 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000919 if (Py_VerboseFlag)
920 PySys_WriteStderr("# %s has bad magic\n",
921 pathname);
922 Py_INCREF(Py_None);
923 return Py_None; /* signal caller to try alternative */
924 }
925
Just van Rossum9a3129c2003-01-03 11:18:56 +0000926 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
927 mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000928 if (Py_VerboseFlag)
929 PySys_WriteStderr("# %s has bad mtime\n",
930 pathname);
931 Py_INCREF(Py_None);
932 return Py_None; /* signal caller to try alternative */
933 }
934
935 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
936 if (code == NULL)
937 return NULL;
938 if (!PyCode_Check(code)) {
939 Py_DECREF(code);
940 PyErr_Format(PyExc_TypeError,
941 "compiled module %.200s is not a code object",
942 pathname);
943 return NULL;
944 }
945 return code;
946}
947
948/* Replace any occurances of "\r\n?" in the input string with "\n".
949 This converts DOS and Mac line endings to Unix line endings.
950 Also append a trailing "\n" to be compatible with
951 PyParser_SimpleParseFile(). Returns a new reference. */
952static PyObject *
953normalize_line_endings(PyObject *source)
954{
Just van Rossum9a3129c2003-01-03 11:18:56 +0000955 char *buf, *q, *p = PyString_AsString(source);
Just van Rossum52e14d62002-12-30 22:08:05 +0000956 PyObject *fixed_source;
957
Just van Rossum9a3129c2003-01-03 11:18:56 +0000958 /* one char extra for trailing \n and one for terminating \0 */
959 buf = PyMem_Malloc(PyString_Size(source) + 2);
960 if (buf == NULL) {
961 PyErr_SetString(PyExc_MemoryError,
962 "zipimport: no memory to allocate "
963 "source buffer");
Just van Rossum52e14d62002-12-30 22:08:05 +0000964 return NULL;
Just van Rossum9a3129c2003-01-03 11:18:56 +0000965 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000966 /* replace "\r\n?" by "\n" */
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000967 for (q = buf; *p != '\0'; p++) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000968 if (*p == '\r') {
969 *q++ = '\n';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000970 if (*(p + 1) == '\n')
Just van Rossum52e14d62002-12-30 22:08:05 +0000971 p++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000972 }
973 else
974 *q++ = *p;
Just van Rossum52e14d62002-12-30 22:08:05 +0000975 }
976 *q++ = '\n'; /* add trailing \n */
977 *q = '\0';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000978 fixed_source = PyString_FromString(buf);
979 PyMem_Free(buf);
Just van Rossum52e14d62002-12-30 22:08:05 +0000980 return fixed_source;
981}
982
983/* Given a string buffer containing Python source code, compile it
984 return and return a code object as a new reference. */
985static PyObject *
986compile_source(char *pathname, PyObject *source)
987{
988 PyObject *code, *fixed_source;
989
990 fixed_source = normalize_line_endings(source);
991 if (fixed_source == NULL)
992 return NULL;
993
994 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
995 Py_file_input);
996 Py_DECREF(fixed_source);
997 return code;
998}
999
1000/* Convert the date/time values found in the Zip archive to a value
1001 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001002static time_t
1003parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001004{
1005 struct tm stm;
1006
1007 stm.tm_sec = (dostime & 0x1f) * 2;
1008 stm.tm_min = (dostime >> 5) & 0x3f;
1009 stm.tm_hour = (dostime >> 11) & 0x1f;
1010 stm.tm_mday = dosdate & 0x1f;
1011 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1012 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
Just van Rossum547eb422003-04-08 20:07:15 +00001013 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001014
1015 return mktime(&stm);
1016}
1017
1018/* Given a path to a .pyc or .pyo file in the archive, return the
1019 modifictaion time of the matching .py file, or 0 if no source
1020 is available. */
1021static time_t
1022get_mtime_of_source(ZipImporter *self, char *path)
1023{
1024 PyObject *toc_entry;
1025 time_t mtime = 0;
1026 int lastchar = strlen(path) - 1;
1027 char savechar = path[lastchar];
1028 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1029 toc_entry = PyDict_GetItemString(self->files, path);
1030 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1031 PyTuple_Size(toc_entry) == 8) {
Just van Rossumf8b6de12002-12-31 09:51:59 +00001032 /* fetch the time stamp of the .py file for comparison
1033 with an embedded pyc time stamp */
1034 int time, date;
Just van Rossum52e14d62002-12-30 22:08:05 +00001035 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1036 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1037 mtime = parse_dostime(time, date);
1038 }
1039 path[lastchar] = savechar;
1040 return mtime;
1041}
1042
1043/* Return the code object for the module named by 'fullname' from the
1044 Zip archive as a new reference. */
1045static PyObject *
1046get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1047 time_t mtime, PyObject *toc_entry)
1048{
1049 PyObject *data, *code;
1050 char *modpath;
1051 char *archive = PyString_AsString(self->archive);
1052
1053 if (archive == NULL)
1054 return NULL;
1055
1056 data = get_data(archive, toc_entry);
1057 if (data == NULL)
1058 return NULL;
1059
1060 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1061
1062 if (isbytecode) {
1063 code = unmarshal_code(modpath, data, mtime);
1064 }
1065 else {
1066 code = compile_source(modpath, data);
1067 }
1068 Py_DECREF(data);
1069 return code;
1070}
1071
1072/* Get the code object assoiciated with the module specified by
1073 'fullname'. */
1074static PyObject *
1075get_module_code(ZipImporter *self, char *fullname,
1076 int *p_ispackage, char **p_modpath)
1077{
1078 PyObject *toc_entry;
1079 char *subname, path[MAXPATHLEN + 1];
1080 int len;
1081 struct st_zip_searchorder *zso;
1082
1083 subname = get_subname(fullname);
1084
1085 len = make_filename(PyString_AsString(self->prefix), subname, path);
1086 if (len < 0)
1087 return NULL;
1088
1089 for (zso = zip_searchorder; *zso->suffix; zso++) {
1090 PyObject *code = NULL;
1091
1092 strcpy(path + len, zso->suffix);
1093 if (Py_VerboseFlag > 1)
1094 PySys_WriteStderr("# trying %s%c%s\n",
1095 PyString_AsString(self->archive),
1096 SEP, path);
1097 toc_entry = PyDict_GetItemString(self->files, path);
1098 if (toc_entry != NULL) {
1099 time_t mtime = 0;
1100 int ispackage = zso->type & IS_PACKAGE;
1101 int isbytecode = zso->type & IS_BYTECODE;
1102
1103 if (isbytecode)
1104 mtime = get_mtime_of_source(self, path);
1105 if (p_ispackage != NULL)
1106 *p_ispackage = ispackage;
1107 code = get_code_from_data(self, ispackage,
1108 isbytecode, mtime,
1109 toc_entry);
1110 if (code == Py_None) {
1111 /* bad magic number or non-matching mtime
1112 in byte code, try next */
1113 Py_DECREF(code);
1114 continue;
1115 }
1116 if (code != NULL && p_modpath != NULL)
1117 *p_modpath = PyString_AsString(
1118 PyTuple_GetItem(toc_entry, 0));
1119 return code;
1120 }
1121 }
1122 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1123 return NULL;
1124}
1125
1126
1127/* Module init */
1128
1129PyDoc_STRVAR(zipimport_doc,
1130"zipimport provides support for importing Python modules from Zip archives.\n\
1131\n\
1132This module exports three objects:\n\
1133- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1134- ZipImporterError: exception raised by zipimporter objects. It's a\n\
1135 subclass of ImportError, so it can be caught as ImportError, too.\n\
1136- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1137 info dicts, as used in zipimporter._files.\n\
1138\n\
1139It is usually not needed to use the zipimport module explicitly; it is\n\
1140used by the builtin import mechanism for sys.path items that are paths\n\
1141to Zip archives.");
1142
1143PyMODINIT_FUNC
1144initzipimport(void)
1145{
1146 PyObject *mod;
1147
1148 if (PyType_Ready(&ZipImporter_Type) < 0)
1149 return;
1150
1151 /* Correct directory separator */
1152 zip_searchorder[0].suffix[0] = SEP;
1153 zip_searchorder[1].suffix[0] = SEP;
1154 zip_searchorder[2].suffix[0] = SEP;
1155 if (Py_OptimizeFlag) {
1156 /* Reverse *.pyc and *.pyo */
1157 struct st_zip_searchorder tmp;
1158 tmp = zip_searchorder[0];
1159 zip_searchorder[0] = zip_searchorder[1];
1160 zip_searchorder[1] = tmp;
1161 tmp = zip_searchorder[3];
1162 zip_searchorder[3] = zip_searchorder[4];
1163 zip_searchorder[4] = tmp;
1164 }
1165
1166 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1167 NULL, PYTHON_API_VERSION);
1168
1169 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1170 PyExc_ImportError, NULL);
1171 if (ZipImportError == NULL)
1172 return;
1173
1174 Py_INCREF(ZipImportError);
1175 if (PyModule_AddObject(mod, "ZipImportError",
1176 ZipImportError) < 0)
1177 return;
1178
1179 Py_INCREF(&ZipImporter_Type);
1180 if (PyModule_AddObject(mod, "zipimporter",
1181 (PyObject *)&ZipImporter_Type) < 0)
1182 return;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001183
Just van Rossum52e14d62002-12-30 22:08:05 +00001184 zip_directory_cache = PyDict_New();
1185 if (zip_directory_cache == NULL)
1186 return;
1187 Py_INCREF(zip_directory_cache);
1188 if (PyModule_AddObject(mod, "_zip_directory_cache",
1189 zip_directory_cache) < 0)
1190 return;
1191}