blob: 934367e82303d7e1ec18e0a38d24548131f5f3d5 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
13 char suffix[14];
14 int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Just van Rossum52e14d62002-12-30 22:08:05 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
41};
42
43static PyTypeObject ZipImporter_Type;
44static PyObject *ZipImportError;
45static PyObject *zip_directory_cache = NULL;
46
47/* forward decls */
48static PyObject *read_directory(char *archive);
49static PyObject *get_data(char *archive, PyObject *toc_entry);
50static PyObject *get_module_code(ZipImporter *self, char *fullname,
51 int *p_ispackage, char **p_modpath);
52
53
54#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
55
56
57/* zipimporter.__init__
58 Split the "subdirectory" from the Zip archive path, lookup a matching
59 entry in sys.path_importer_cache, fetch the file directory from there
60 if found, or else read it from the archive. */
61static int
62zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
63{
64 char *path, *p, *prefix, buf[MAXPATHLEN+2];
65 int len;
66
Georg Brandl02c42872005-08-26 06:42:30 +000067 if (!_PyArg_NoKeywords("zipimporter()", kwds))
68 return -1;
69
Just van Rossum52e14d62002-12-30 22:08:05 +000070 if (!PyArg_ParseTuple(args, "s:zipimporter",
71 &path))
72 return -1;
73
74 len = strlen(path);
75 if (len == 0) {
76 PyErr_SetString(ZipImportError, "archive path is empty");
77 return -1;
78 }
79 if (len >= MAXPATHLEN) {
80 PyErr_SetString(ZipImportError,
81 "archive path too long");
82 return -1;
83 }
84 strcpy(buf, path);
85
86#ifdef ALTSEP
87 for (p = buf; *p; p++) {
88 if (*p == ALTSEP)
89 *p = SEP;
90 }
91#endif
92
93 path = NULL;
94 prefix = NULL;
95 for (;;) {
Martin v. Löwisa94568a2003-05-10 07:36:56 +000096#ifndef RISCOS
Just van Rossum52e14d62002-12-30 22:08:05 +000097 struct stat statbuf;
98 int rv;
99
100 rv = stat(buf, &statbuf);
101 if (rv == 0) {
102 /* it exists */
103 if (S_ISREG(statbuf.st_mode))
104 /* it's a file */
105 path = buf;
106 break;
107 }
Martin v. Löwisa94568a2003-05-10 07:36:56 +0000108#else
109 if (object_exists(buf)) {
110 /* it exists */
111 if (isfile(buf))
112 /* it's a file */
113 path = buf;
114 break;
115 }
116#endif
Just van Rossum52e14d62002-12-30 22:08:05 +0000117 /* back up one path element */
Just van Rossumd35c6db2003-01-02 12:55:48 +0000118 p = strrchr(buf, SEP);
Just van Rossum52e14d62002-12-30 22:08:05 +0000119 if (prefix != NULL)
120 *prefix = SEP;
121 if (p == NULL)
122 break;
123 *p = '\0';
124 prefix = p;
125 }
126 if (path != NULL) {
127 PyObject *files;
128 files = PyDict_GetItemString(zip_directory_cache, path);
129 if (files == NULL) {
130 files = read_directory(buf);
131 if (files == NULL)
132 return -1;
133 if (PyDict_SetItemString(zip_directory_cache, path,
134 files) != 0)
135 return -1;
136 }
137 else
138 Py_INCREF(files);
139 self->files = files;
140 }
141 else {
142 PyErr_SetString(ZipImportError, "not a Zip file");
143 return -1;
144 }
145
146 if (prefix == NULL)
147 prefix = "";
148 else {
149 prefix++;
150 len = strlen(prefix);
151 if (prefix[len-1] != SEP) {
152 /* add trailing SEP */
153 prefix[len] = SEP;
154 prefix[len + 1] = '\0';
155 }
156 }
157
158 self->archive = PyString_FromString(buf);
159 if (self->archive == NULL)
160 return -1;
161
162 self->prefix = PyString_FromString(prefix);
163 if (self->prefix == NULL)
164 return -1;
165
166 return 0;
167}
168
169/* GC support. */
170static int
171zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
172{
173 ZipImporter *self = (ZipImporter *)obj;
174 int err;
175
176 if (self->files != NULL) {
177 err = visit(self->files, arg);
178 if (err)
179 return err;
180 }
181 return 0;
182}
183
184static void
185zipimporter_dealloc(ZipImporter *self)
186{
187 PyObject_GC_UnTrack(self);
188 Py_XDECREF(self->archive);
Just van Rossumf8b6de12002-12-31 09:51:59 +0000189 Py_XDECREF(self->prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000190 Py_XDECREF(self->files);
191 self->ob_type->tp_free((PyObject *)self);
192}
193
194static PyObject *
195zipimporter_repr(ZipImporter *self)
196{
197 char buf[500];
198 char *archive = "???";
199 char *prefix = "";
200
201 if (self->archive != NULL && PyString_Check(self->archive))
202 archive = PyString_AsString(self->archive);
203 if (self->prefix != NULL && PyString_Check(self->prefix))
204 prefix = PyString_AsString(self->prefix);
205 if (prefix != NULL && *prefix)
206 PyOS_snprintf(buf, sizeof(buf),
207 "<zipimporter object \"%.300s%c%.150s\">",
208 archive, SEP, prefix);
209 else
210 PyOS_snprintf(buf, sizeof(buf),
211 "<zipimporter object \"%.300s\">",
212 archive);
213 return PyString_FromString(buf);
214}
215
216/* return fullname.split(".")[-1] */
217static char *
218get_subname(char *fullname)
219{
220 char *subname = strrchr(fullname, '.');
221 if (subname == NULL)
222 subname = fullname;
223 else
224 subname++;
225 return subname;
226}
227
228/* Given a (sub)modulename, write the potential file path in the
229 archive (without extension) to the path buffer. Return the
230 length of the resulting string. */
231static int
232make_filename(char *prefix, char *name, char *path)
233{
234 int len;
235 char *p;
236
237 len = strlen(prefix);
238
239 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
240 if (len + strlen(name) + 13 >= MAXPATHLEN) {
241 PyErr_SetString(ZipImportError, "path too long");
242 return -1;
243 }
244
245 strcpy(path, prefix);
246 strcpy(path + len, name);
247 for (p = path + len; *p; p++) {
248 if (*p == '.')
249 *p = SEP;
250 }
251 len += strlen(name);
252 return len;
253}
254
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000255enum zi_module_info {
Just van Rossum52e14d62002-12-30 22:08:05 +0000256 MI_ERROR,
257 MI_NOT_FOUND,
258 MI_MODULE,
259 MI_PACKAGE
260};
261
262/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000263static enum zi_module_info
Just van Rossum52e14d62002-12-30 22:08:05 +0000264get_module_info(ZipImporter *self, char *fullname)
265{
266 char *subname, path[MAXPATHLEN + 1];
267 int len;
268 struct st_zip_searchorder *zso;
269
270 subname = get_subname(fullname);
271
272 len = make_filename(PyString_AsString(self->prefix), subname, path);
273 if (len < 0)
274 return MI_ERROR;
275
276 for (zso = zip_searchorder; *zso->suffix; zso++) {
277 strcpy(path + len, zso->suffix);
278 if (PyDict_GetItemString(self->files, path) != NULL) {
279 if (zso->type & IS_PACKAGE)
280 return MI_PACKAGE;
281 else
282 return MI_MODULE;
283 }
284 }
285 return MI_NOT_FOUND;
286}
287
288/* Check whether we can satisfy the import of the module named by
289 'fullname'. Return self if we can, None if we can't. */
290static PyObject *
291zipimporter_find_module(PyObject *obj, PyObject *args)
292{
293 ZipImporter *self = (ZipImporter *)obj;
294 PyObject *path = NULL;
295 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000296 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000297
298 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
299 &fullname, &path))
300 return NULL;
301
302 mi = get_module_info(self, fullname);
303 if (mi == MI_ERROR)
304 return NULL;
305 if (mi == MI_NOT_FOUND) {
306 Py_INCREF(Py_None);
307 return Py_None;
308 }
309 Py_INCREF(self);
310 return (PyObject *)self;
311}
312
313/* Load and return the module named by 'fullname'. */
314static PyObject *
315zipimporter_load_module(PyObject *obj, PyObject *args)
316{
317 ZipImporter *self = (ZipImporter *)obj;
318 PyObject *code, *mod, *dict;
319 char *fullname, *modpath;
320 int ispackage;
321
322 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
323 &fullname))
324 return NULL;
325
326 code = get_module_code(self, fullname, &ispackage, &modpath);
327 if (code == NULL)
328 return NULL;
329
330 mod = PyImport_AddModule(fullname);
331 if (mod == NULL) {
332 Py_DECREF(code);
333 return NULL;
334 }
335 dict = PyModule_GetDict(mod);
336
337 /* mod.__loader__ = self */
338 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
339 goto error;
340
341 if (ispackage) {
342 /* add __path__ to the module *before* the code gets
343 executed */
344 PyObject *pkgpath, *fullpath;
345 char *prefix = PyString_AsString(self->prefix);
346 char *subname = get_subname(fullname);
347 int err;
348
349 fullpath = PyString_FromFormat("%s%c%s%s",
350 PyString_AsString(self->archive),
351 SEP,
352 *prefix ? prefix : "",
353 subname);
354 if (fullpath == NULL)
355 goto error;
356
357 pkgpath = Py_BuildValue("[O]", fullpath);
358 Py_DECREF(fullpath);
359 if (pkgpath == NULL)
360 goto error;
361 err = PyDict_SetItemString(dict, "__path__", pkgpath);
362 Py_DECREF(pkgpath);
363 if (err != 0)
364 goto error;
365 }
366 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
367 Py_DECREF(code);
368 if (Py_VerboseFlag)
369 PySys_WriteStderr("import %s # loaded from Zip %s\n",
370 fullname, modpath);
371 return mod;
372error:
373 Py_DECREF(code);
374 Py_DECREF(mod);
375 return NULL;
376}
377
378/* Return a bool signifying whether the module is a package or not. */
379static PyObject *
380zipimporter_is_package(PyObject *obj, PyObject *args)
381{
382 ZipImporter *self = (ZipImporter *)obj;
383 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000384 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000385
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000386 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
Just van Rossum52e14d62002-12-30 22:08:05 +0000387 &fullname))
388 return NULL;
389
390 mi = get_module_info(self, fullname);
391 if (mi == MI_ERROR)
392 return NULL;
393 if (mi == MI_NOT_FOUND) {
394 PyErr_Format(ZipImportError, "can't find module '%.200s'",
395 fullname);
396 return NULL;
397 }
398 return PyBool_FromLong(mi == MI_PACKAGE);
399}
400
401static PyObject *
402zipimporter_get_data(PyObject *obj, PyObject *args)
403{
404 ZipImporter *self = (ZipImporter *)obj;
405 char *path;
406#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000407 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000408#endif
409 PyObject *toc_entry;
410 int len;
411
412 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
413 return NULL;
414
415#ifdef ALTSEP
416 if (strlen(path) >= MAXPATHLEN) {
417 PyErr_SetString(ZipImportError, "path too long");
418 return NULL;
419 }
420 strcpy(buf, path);
421 for (p = buf; *p; p++) {
422 if (*p == ALTSEP)
423 *p = SEP;
424 }
425 path = buf;
426#endif
427 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000428 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000429 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
430 path[len] == SEP) {
431 path = path + len + 1;
432 }
433
434 toc_entry = PyDict_GetItemString(self->files, path);
435 if (toc_entry == NULL) {
436 PyErr_Format(PyExc_IOError, "file not found [%.200s]",
437 path);
438 return NULL;
439 }
440 return get_data(PyString_AsString(self->archive), toc_entry);
441}
442
443static PyObject *
444zipimporter_get_code(PyObject *obj, PyObject *args)
445{
446 ZipImporter *self = (ZipImporter *)obj;
447 char *fullname;
448
449 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
450 return NULL;
451
452 return get_module_code(self, fullname, NULL, NULL);
453}
454
455static PyObject *
456zipimporter_get_source(PyObject *obj, PyObject *args)
457{
458 ZipImporter *self = (ZipImporter *)obj;
459 PyObject *toc_entry;
460 char *fullname, *subname, path[MAXPATHLEN+1];
461 int len;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000462 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000463
464 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
465 return NULL;
466
467 mi = get_module_info(self, fullname);
468 if (mi == MI_ERROR)
469 return NULL;
470 if (mi == MI_NOT_FOUND) {
471 PyErr_Format(ZipImportError, "can't find module '%.200s'",
472 fullname);
473 return NULL;
474 }
475 subname = get_subname(fullname);
476
477 len = make_filename(PyString_AsString(self->prefix), subname, path);
478 if (len < 0)
479 return NULL;
480
481 if (mi == MI_PACKAGE) {
482 path[len] = SEP;
483 strcpy(path + len + 1, "__init__.py");
484 }
485 else
486 strcpy(path + len, ".py");
487
488 toc_entry = PyDict_GetItemString(self->files, path);
489 if (toc_entry != NULL)
490 return get_data(PyString_AsString(self->archive), toc_entry);
491
492 /* we have the module, but no source */
493 Py_INCREF(Py_None);
494 return Py_None;
495}
496
497PyDoc_STRVAR(doc_find_module,
498"find_module(fullname, path=None) -> self or None.\n\
499\n\
500Search for a module specified by 'fullname'. 'fullname' must be the\n\
501fully qualified (dotted) module name. It returns the zipimporter\n\
502instance itself if the module was found, or None if it wasn't.\n\
503The optional 'path' argument is ignored -- it's there for compatibility\n\
504with the importer protocol.");
505
506PyDoc_STRVAR(doc_load_module,
507"load_module(fullname) -> module.\n\
508\n\
509Load the module specified by 'fullname'. 'fullname' must be the\n\
510fully qualified (dotted) module name. It returns the imported\n\
511module, or raises ZipImportError if it wasn't found.");
512
513PyDoc_STRVAR(doc_get_data,
514"get_data(pathname) -> string with file data.\n\
515\n\
516Return the data associated with 'pathname'. Raise IOError if\n\
517the file wasn't found.");
518
519PyDoc_STRVAR(doc_is_package,
520"is_package(fullname) -> bool.\n\
521\n\
522Return True if the module specified by fullname is a package.\n\
523Raise ZipImportError is the module couldn't be found.");
524
525PyDoc_STRVAR(doc_get_code,
526"get_code(fullname) -> code object.\n\
527\n\
528Return the code object for the specified module. Raise ZipImportError\n\
529is the module couldn't be found.");
530
531PyDoc_STRVAR(doc_get_source,
532"get_source(fullname) -> source string.\n\
533\n\
534Return the source code for the specified module. Raise ZipImportError\n\
535is the module couldn't be found, return None if the archive does\n\
536contain the module, but has no source for it.");
537
538static PyMethodDef zipimporter_methods[] = {
539 {"find_module", zipimporter_find_module, METH_VARARGS,
540 doc_find_module},
541 {"load_module", zipimporter_load_module, METH_VARARGS,
542 doc_load_module},
543 {"get_data", zipimporter_get_data, METH_VARARGS,
544 doc_get_data},
545 {"get_code", zipimporter_get_code, METH_VARARGS,
546 doc_get_code},
547 {"get_source", zipimporter_get_source, METH_VARARGS,
548 doc_get_source},
549 {"is_package", zipimporter_is_package, METH_VARARGS,
550 doc_is_package},
551 {NULL, NULL} /* sentinel */
552};
553
554static PyMemberDef zipimporter_members[] = {
555 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
556 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
557 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
558 {NULL}
559};
560
561PyDoc_STRVAR(zipimporter_doc,
562"zipimporter(archivepath) -> zipimporter object\n\
563\n\
564Create a new zipimporter instance. 'archivepath' must be a path to\n\
565a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
566a valid Zip archive.");
567
568#define DEFERRED_ADDRESS(ADDR) 0
569
570static PyTypeObject ZipImporter_Type = {
571 PyObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type))
572 0,
573 "zipimport.zipimporter",
574 sizeof(ZipImporter),
575 0, /* tp_itemsize */
576 (destructor)zipimporter_dealloc, /* tp_dealloc */
577 0, /* tp_print */
578 0, /* tp_getattr */
579 0, /* tp_setattr */
580 0, /* tp_compare */
581 (reprfunc)zipimporter_repr, /* tp_repr */
582 0, /* tp_as_number */
583 0, /* tp_as_sequence */
584 0, /* tp_as_mapping */
585 0, /* tp_hash */
586 0, /* tp_call */
587 0, /* tp_str */
588 PyObject_GenericGetAttr, /* tp_getattro */
589 0, /* tp_setattro */
590 0, /* tp_as_buffer */
591 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
592 Py_TPFLAGS_HAVE_GC, /* tp_flags */
593 zipimporter_doc, /* tp_doc */
594 zipimporter_traverse, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /* tp_weaklistoffset */
598 0, /* tp_iter */
599 0, /* tp_iternext */
600 zipimporter_methods, /* tp_methods */
601 zipimporter_members, /* tp_members */
602 0, /* tp_getset */
603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
608 (initproc)zipimporter_init, /* tp_init */
609 PyType_GenericAlloc, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
611 PyObject_GC_Del, /* tp_free */
612};
613
614
615/* implementation */
616
Just van Rossum52e14d62002-12-30 22:08:05 +0000617/* Given a buffer, return the long that is represented by the first
618 4 bytes, encoded as little endian. This partially reimplements
619 marshal.c:r_long() */
620static long
621get_long(unsigned char *buf) {
622 long x;
623 x = buf[0];
624 x |= (long)buf[1] << 8;
625 x |= (long)buf[2] << 16;
626 x |= (long)buf[3] << 24;
627#if SIZEOF_LONG > 4
628 /* Sign extension for 64-bit machines */
629 x |= -(x & 0x80000000L);
630#endif
631 return x;
632}
633
634/*
635 read_directory(archive) -> files dict (new reference)
636
637 Given a path to a Zip archive, build a dict, mapping file names
638 (local to the archive, using SEP as a separator) to toc entries.
639
640 A toc_entry is a tuple:
641
642 (compress, # compression kind; 0 for uncompressed
643 data_size, # size of compressed data on disk
644 file_size, # size of decompressed data
645 file_offset, # offset of file header from start of archive
646 time, # mod time of file (in dos format)
647 date, # mod data of file (in dos format)
648 crc, # crc checksum of the data
649 )
650
651 Directories can be recognized by the trailing SEP in the name,
652 data_size and file_offset are 0.
653*/
654static PyObject *
655read_directory(char *archive)
656{
657 PyObject *files = NULL;
658 FILE *fp;
659 long compress, crc, data_size, file_size, file_offset, date, time;
Thomas Heller354e3d92003-07-22 18:10:15 +0000660 long header_offset, name_size, header_size, header_position;
Just van Rossum52e14d62002-12-30 22:08:05 +0000661 long i, l, length, count;
662 char path[MAXPATHLEN + 5];
663 char name[MAXPATHLEN + 5];
664 char *p, endof_central_dir[22];
Thomas Heller354e3d92003-07-22 18:10:15 +0000665 long arc_offset; /* offset from beginning of file to start of zip-archive */
Just van Rossum52e14d62002-12-30 22:08:05 +0000666
667 if (strlen(archive) > MAXPATHLEN) {
668 PyErr_SetString(PyExc_OverflowError,
669 "Zip path name is too long");
670 return NULL;
671 }
672 strcpy(path, archive);
673
674 fp = fopen(archive, "rb");
675 if (fp == NULL) {
676 PyErr_Format(ZipImportError, "can't open Zip file: "
677 "'%.200s'", archive);
678 return NULL;
679 }
Just van Rossumf4ecc752003-02-28 08:54:01 +0000680 fseek(fp, -22, SEEK_END);
Thomas Heller354e3d92003-07-22 18:10:15 +0000681 header_position = ftell(fp);
Just van Rossum52e14d62002-12-30 22:08:05 +0000682 if (fread(endof_central_dir, 1, 22, fp) != 22) {
683 fclose(fp);
684 PyErr_Format(ZipImportError, "can't read Zip file: "
685 "'%.200s'", archive);
686 return NULL;
687 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000688 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000689 /* Bad: End of Central Dir signature */
690 fclose(fp);
691 PyErr_Format(ZipImportError, "not a Zip file: "
692 "'%.200s'", archive);
693 return NULL;
694 }
695
Thomas Heller354e3d92003-07-22 18:10:15 +0000696 header_size = get_long((unsigned char *)endof_central_dir + 12);
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000697 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Thomas Heller354e3d92003-07-22 18:10:15 +0000698 arc_offset = header_position - header_offset - header_size;
699 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000700
701 files = PyDict_New();
702 if (files == NULL)
703 goto error;
704
705 length = (long)strlen(path);
706 path[length] = SEP;
707
708 /* Start of Central Directory */
709 count = 0;
710 for (;;) {
711 PyObject *t;
712 int err;
713
714 fseek(fp, header_offset, 0); /* Start of file header */
715 l = PyMarshal_ReadLongFromFile(fp);
716 if (l != 0x02014B50)
717 break; /* Bad: Central Dir File Header */
718 fseek(fp, header_offset + 10, 0);
719 compress = PyMarshal_ReadShortFromFile(fp);
720 time = PyMarshal_ReadShortFromFile(fp);
721 date = PyMarshal_ReadShortFromFile(fp);
722 crc = PyMarshal_ReadLongFromFile(fp);
723 data_size = PyMarshal_ReadLongFromFile(fp);
724 file_size = PyMarshal_ReadLongFromFile(fp);
725 name_size = PyMarshal_ReadShortFromFile(fp);
726 header_size = 46 + name_size +
727 PyMarshal_ReadShortFromFile(fp) +
728 PyMarshal_ReadShortFromFile(fp);
729 fseek(fp, header_offset + 42, 0);
Thomas Heller354e3d92003-07-22 18:10:15 +0000730 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000731 if (name_size > MAXPATHLEN)
732 name_size = MAXPATHLEN;
733
734 p = name;
735 for (i = 0; i < name_size; i++) {
736 *p = (char)getc(fp);
737 if (*p == '/')
738 *p = SEP;
739 p++;
740 }
741 *p = 0; /* Add terminating null byte */
742 header_offset += header_size;
743
744 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
745
746 t = Py_BuildValue("siiiiiii", path, compress, data_size,
747 file_size, file_offset, time, date, crc);
748 if (t == NULL)
749 goto error;
750 err = PyDict_SetItemString(files, name, t);
751 Py_DECREF(t);
752 if (err != 0)
753 goto error;
754 count++;
755 }
756 fclose(fp);
757 if (Py_VerboseFlag)
758 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
759 count, archive);
760 return files;
761error:
762 fclose(fp);
763 Py_XDECREF(files);
764 return NULL;
765}
766
767/* Return the zlib.decompress function object, or NULL if zlib couldn't
768 be imported. The function is cached when found, so subsequent calls
769 don't import zlib again. Returns a *borrowed* reference.
770 XXX This makes zlib.decompress immortal. */
771static PyObject *
772get_decompress_func(void)
773{
774 static PyObject *decompress = NULL;
775
776 if (decompress == NULL) {
777 PyObject *zlib;
778 static int importing_zlib = 0;
779
780 if (importing_zlib != 0)
781 /* Someone has a zlib.py[co] in their Zip file;
782 let's avoid a stack overflow. */
783 return NULL;
784 importing_zlib = 1;
785 zlib = PyImport_ImportModule("zlib"); /* import zlib */
786 importing_zlib = 0;
787 if (zlib != NULL) {
788 decompress = PyObject_GetAttrString(zlib,
789 "decompress");
790 Py_DECREF(zlib);
791 }
792 else
793 PyErr_Clear();
794 if (Py_VerboseFlag)
795 PySys_WriteStderr("# zipimport: zlib %s\n",
796 zlib != NULL ? "available": "UNAVAILABLE");
797 }
798 return decompress;
799}
800
801/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
802 data as a new reference. */
803static PyObject *
804get_data(char *archive, PyObject *toc_entry)
805{
806 PyObject *raw_data, *data = NULL, *decompress;
807 char *buf;
808 FILE *fp;
809 int err, bytes_read = 0;
810 long l;
811 char *datapath;
812 long compress, data_size, file_size, file_offset;
813 long time, date, crc;
814
Neal Norwitz0c0aad92003-02-18 03:37:49 +0000815 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
Just van Rossum52e14d62002-12-30 22:08:05 +0000816 &data_size, &file_size, &file_offset, &time,
817 &date, &crc)) {
818 return NULL;
819 }
820
821 fp = fopen(archive, "rb");
822 if (!fp) {
823 PyErr_Format(PyExc_IOError,
824 "zipimport: can not open file %s", archive);
825 return NULL;
826 }
827
828 /* Check to make sure the local file header is correct */
829 fseek(fp, file_offset, 0);
830 l = PyMarshal_ReadLongFromFile(fp);
831 if (l != 0x04034B50) {
832 /* Bad: Local File Header */
833 PyErr_Format(ZipImportError,
834 "bad local file header in %s",
835 archive);
836 fclose(fp);
837 return NULL;
838 }
839 fseek(fp, file_offset + 26, 0);
840 l = 30 + PyMarshal_ReadShortFromFile(fp) +
841 PyMarshal_ReadShortFromFile(fp); /* local header size */
842 file_offset += l; /* Start of file data */
843
844 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
845 data_size : data_size + 1);
846 if (raw_data == NULL) {
847 fclose(fp);
848 return NULL;
849 }
850 buf = PyString_AsString(raw_data);
851
852 err = fseek(fp, file_offset, 0);
853 if (err == 0)
854 bytes_read = fread(buf, 1, data_size, fp);
855 fclose(fp);
856 if (err || bytes_read != data_size) {
857 PyErr_SetString(PyExc_IOError,
858 "zipimport: can't read data");
859 Py_DECREF(raw_data);
860 return NULL;
861 }
862
863 if (compress != 0) {
864 buf[data_size] = 'Z'; /* saw this in zipfile.py */
865 data_size++;
866 }
867 buf[data_size] = '\0';
868
869 if (compress == 0) /* data is not compressed */
870 return raw_data;
871
872 /* Decompress with zlib */
873 decompress = get_decompress_func();
874 if (decompress == NULL) {
875 PyErr_SetString(ZipImportError,
876 "can't decompress data; "
877 "zlib not available");
878 goto error;
879 }
Just van Rossumee8f10f2003-09-07 13:36:48 +0000880 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000881error:
882 Py_DECREF(raw_data);
883 return data;
884}
885
886/* Lenient date/time comparison function. The precision of the mtime
887 in the archive is lower than the mtime stored in a .pyc: we
888 must allow a difference of at most one second. */
889static int
890eq_mtime(time_t t1, time_t t2)
891{
892 time_t d = t1 - t2;
893 if (d < 0)
894 d = -d;
895 /* dostime only stores even seconds, so be lenient */
896 return d <= 1;
897}
898
899/* Given the contents of a .py[co] file in a buffer, unmarshal the data
900 and return the code object. Return None if it the magic word doesn't
901 match (we do this instead of raising an exception as we fall back
902 to .py if available and we don't want to mask other errors).
903 Returns a new reference. */
904static PyObject *
905unmarshal_code(char *pathname, PyObject *data, time_t mtime)
906{
907 PyObject *code;
908 char *buf = PyString_AsString(data);
909 int size = PyString_Size(data);
910
911 if (size <= 9) {
912 PyErr_SetString(ZipImportError,
913 "bad pyc data");
914 return NULL;
915 }
916
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000917 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000918 if (Py_VerboseFlag)
919 PySys_WriteStderr("# %s has bad magic\n",
920 pathname);
921 Py_INCREF(Py_None);
922 return Py_None; /* signal caller to try alternative */
923 }
924
Just van Rossum9a3129c2003-01-03 11:18:56 +0000925 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
926 mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000927 if (Py_VerboseFlag)
928 PySys_WriteStderr("# %s has bad mtime\n",
929 pathname);
930 Py_INCREF(Py_None);
931 return Py_None; /* signal caller to try alternative */
932 }
933
934 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
935 if (code == NULL)
936 return NULL;
937 if (!PyCode_Check(code)) {
938 Py_DECREF(code);
939 PyErr_Format(PyExc_TypeError,
940 "compiled module %.200s is not a code object",
941 pathname);
942 return NULL;
943 }
944 return code;
945}
946
947/* Replace any occurances of "\r\n?" in the input string with "\n".
948 This converts DOS and Mac line endings to Unix line endings.
949 Also append a trailing "\n" to be compatible with
950 PyParser_SimpleParseFile(). Returns a new reference. */
951static PyObject *
952normalize_line_endings(PyObject *source)
953{
Just van Rossum9a3129c2003-01-03 11:18:56 +0000954 char *buf, *q, *p = PyString_AsString(source);
Just van Rossum52e14d62002-12-30 22:08:05 +0000955 PyObject *fixed_source;
956
Just van Rossum9a3129c2003-01-03 11:18:56 +0000957 /* one char extra for trailing \n and one for terminating \0 */
958 buf = PyMem_Malloc(PyString_Size(source) + 2);
959 if (buf == NULL) {
960 PyErr_SetString(PyExc_MemoryError,
961 "zipimport: no memory to allocate "
962 "source buffer");
Just van Rossum52e14d62002-12-30 22:08:05 +0000963 return NULL;
Just van Rossum9a3129c2003-01-03 11:18:56 +0000964 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000965 /* replace "\r\n?" by "\n" */
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000966 for (q = buf; *p != '\0'; p++) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000967 if (*p == '\r') {
968 *q++ = '\n';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000969 if (*(p + 1) == '\n')
Just van Rossum52e14d62002-12-30 22:08:05 +0000970 p++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000971 }
972 else
973 *q++ = *p;
Just van Rossum52e14d62002-12-30 22:08:05 +0000974 }
975 *q++ = '\n'; /* add trailing \n */
976 *q = '\0';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000977 fixed_source = PyString_FromString(buf);
978 PyMem_Free(buf);
Just van Rossum52e14d62002-12-30 22:08:05 +0000979 return fixed_source;
980}
981
982/* Given a string buffer containing Python source code, compile it
983 return and return a code object as a new reference. */
984static PyObject *
985compile_source(char *pathname, PyObject *source)
986{
987 PyObject *code, *fixed_source;
988
989 fixed_source = normalize_line_endings(source);
990 if (fixed_source == NULL)
991 return NULL;
992
993 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
994 Py_file_input);
995 Py_DECREF(fixed_source);
996 return code;
997}
998
999/* Convert the date/time values found in the Zip archive to a value
1000 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001001static time_t
1002parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001003{
1004 struct tm stm;
1005
1006 stm.tm_sec = (dostime & 0x1f) * 2;
1007 stm.tm_min = (dostime >> 5) & 0x3f;
1008 stm.tm_hour = (dostime >> 11) & 0x1f;
1009 stm.tm_mday = dosdate & 0x1f;
1010 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1011 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
Just van Rossum547eb422003-04-08 20:07:15 +00001012 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001013
1014 return mktime(&stm);
1015}
1016
1017/* Given a path to a .pyc or .pyo file in the archive, return the
1018 modifictaion time of the matching .py file, or 0 if no source
1019 is available. */
1020static time_t
1021get_mtime_of_source(ZipImporter *self, char *path)
1022{
1023 PyObject *toc_entry;
1024 time_t mtime = 0;
1025 int lastchar = strlen(path) - 1;
1026 char savechar = path[lastchar];
1027 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1028 toc_entry = PyDict_GetItemString(self->files, path);
1029 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1030 PyTuple_Size(toc_entry) == 8) {
Just van Rossumf8b6de12002-12-31 09:51:59 +00001031 /* fetch the time stamp of the .py file for comparison
1032 with an embedded pyc time stamp */
1033 int time, date;
Just van Rossum52e14d62002-12-30 22:08:05 +00001034 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1035 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1036 mtime = parse_dostime(time, date);
1037 }
1038 path[lastchar] = savechar;
1039 return mtime;
1040}
1041
1042/* Return the code object for the module named by 'fullname' from the
1043 Zip archive as a new reference. */
1044static PyObject *
1045get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1046 time_t mtime, PyObject *toc_entry)
1047{
1048 PyObject *data, *code;
1049 char *modpath;
1050 char *archive = PyString_AsString(self->archive);
1051
1052 if (archive == NULL)
1053 return NULL;
1054
1055 data = get_data(archive, toc_entry);
1056 if (data == NULL)
1057 return NULL;
1058
1059 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1060
1061 if (isbytecode) {
1062 code = unmarshal_code(modpath, data, mtime);
1063 }
1064 else {
1065 code = compile_source(modpath, data);
1066 }
1067 Py_DECREF(data);
1068 return code;
1069}
1070
1071/* Get the code object assoiciated with the module specified by
1072 'fullname'. */
1073static PyObject *
1074get_module_code(ZipImporter *self, char *fullname,
1075 int *p_ispackage, char **p_modpath)
1076{
1077 PyObject *toc_entry;
1078 char *subname, path[MAXPATHLEN + 1];
1079 int len;
1080 struct st_zip_searchorder *zso;
1081
1082 subname = get_subname(fullname);
1083
1084 len = make_filename(PyString_AsString(self->prefix), subname, path);
1085 if (len < 0)
1086 return NULL;
1087
1088 for (zso = zip_searchorder; *zso->suffix; zso++) {
1089 PyObject *code = NULL;
1090
1091 strcpy(path + len, zso->suffix);
1092 if (Py_VerboseFlag > 1)
1093 PySys_WriteStderr("# trying %s%c%s\n",
1094 PyString_AsString(self->archive),
1095 SEP, path);
1096 toc_entry = PyDict_GetItemString(self->files, path);
1097 if (toc_entry != NULL) {
1098 time_t mtime = 0;
1099 int ispackage = zso->type & IS_PACKAGE;
1100 int isbytecode = zso->type & IS_BYTECODE;
1101
1102 if (isbytecode)
1103 mtime = get_mtime_of_source(self, path);
1104 if (p_ispackage != NULL)
1105 *p_ispackage = ispackage;
1106 code = get_code_from_data(self, ispackage,
1107 isbytecode, mtime,
1108 toc_entry);
1109 if (code == Py_None) {
1110 /* bad magic number or non-matching mtime
1111 in byte code, try next */
1112 Py_DECREF(code);
1113 continue;
1114 }
1115 if (code != NULL && p_modpath != NULL)
1116 *p_modpath = PyString_AsString(
1117 PyTuple_GetItem(toc_entry, 0));
1118 return code;
1119 }
1120 }
1121 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1122 return NULL;
1123}
1124
1125
1126/* Module init */
1127
1128PyDoc_STRVAR(zipimport_doc,
1129"zipimport provides support for importing Python modules from Zip archives.\n\
1130\n\
1131This module exports three objects:\n\
1132- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1133- ZipImporterError: exception raised by zipimporter objects. It's a\n\
1134 subclass of ImportError, so it can be caught as ImportError, too.\n\
1135- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1136 info dicts, as used in zipimporter._files.\n\
1137\n\
1138It is usually not needed to use the zipimport module explicitly; it is\n\
1139used by the builtin import mechanism for sys.path items that are paths\n\
1140to Zip archives.");
1141
1142PyMODINIT_FUNC
1143initzipimport(void)
1144{
1145 PyObject *mod;
1146
1147 if (PyType_Ready(&ZipImporter_Type) < 0)
1148 return;
1149
1150 /* Correct directory separator */
1151 zip_searchorder[0].suffix[0] = SEP;
1152 zip_searchorder[1].suffix[0] = SEP;
1153 zip_searchorder[2].suffix[0] = SEP;
1154 if (Py_OptimizeFlag) {
1155 /* Reverse *.pyc and *.pyo */
1156 struct st_zip_searchorder tmp;
1157 tmp = zip_searchorder[0];
1158 zip_searchorder[0] = zip_searchorder[1];
1159 zip_searchorder[1] = tmp;
1160 tmp = zip_searchorder[3];
1161 zip_searchorder[3] = zip_searchorder[4];
1162 zip_searchorder[4] = tmp;
1163 }
1164
1165 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1166 NULL, PYTHON_API_VERSION);
1167
1168 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1169 PyExc_ImportError, NULL);
1170 if (ZipImportError == NULL)
1171 return;
1172
1173 Py_INCREF(ZipImportError);
1174 if (PyModule_AddObject(mod, "ZipImportError",
1175 ZipImportError) < 0)
1176 return;
1177
1178 Py_INCREF(&ZipImporter_Type);
1179 if (PyModule_AddObject(mod, "zipimporter",
1180 (PyObject *)&ZipImporter_Type) < 0)
1181 return;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001182
Just van Rossum52e14d62002-12-30 22:08:05 +00001183 zip_directory_cache = PyDict_New();
1184 if (zip_directory_cache == NULL)
1185 return;
1186 Py_INCREF(zip_directory_cache);
1187 if (PyModule_AddObject(mod, "_zip_directory_cache",
1188 zip_directory_cache) < 0)
1189 return;
1190}