blob: 9f7da72d31cc91392c16930bc9de773a007e1d3c [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include "compile.h"
6#include <time.h>
7
8
9#define IS_SOURCE 0x0
10#define IS_BYTECODE 0x1
11#define IS_PACKAGE 0x2
12
13struct st_zip_searchorder {
14 char suffix[14];
15 int type;
16};
17
18/* zip_searchorder defines how we search for a module in the Zip
19 archive: we first search for a package __init__, then for
20 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
21 are swapped by initzipimport() if we run in optimized mode. Also,
22 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000023static struct st_zip_searchorder zip_searchorder[] = {
Just van Rossum52e14d62002-12-30 22:08:05 +000024 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
26 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
27 {".pyc", IS_BYTECODE},
28 {".pyo", IS_BYTECODE},
29 {".py", IS_SOURCE},
30 {"", 0}
31};
32
33/* zipimporter object definition and support */
34
35typedef struct _zipimporter ZipImporter;
36
37struct _zipimporter {
38 PyObject_HEAD
39 PyObject *archive; /* pathname of the Zip archive */
40 PyObject *prefix; /* file prefix: "a/sub/directory/" */
41 PyObject *files; /* dict with file info {path: toc_entry} */
42};
43
44static PyTypeObject ZipImporter_Type;
45static PyObject *ZipImportError;
46static PyObject *zip_directory_cache = NULL;
47
48/* forward decls */
49static PyObject *read_directory(char *archive);
50static PyObject *get_data(char *archive, PyObject *toc_entry);
51static PyObject *get_module_code(ZipImporter *self, char *fullname,
52 int *p_ispackage, char **p_modpath);
53
54
55#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
56
57
58/* zipimporter.__init__
59 Split the "subdirectory" from the Zip archive path, lookup a matching
60 entry in sys.path_importer_cache, fetch the file directory from there
61 if found, or else read it from the archive. */
62static int
63zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
64{
65 char *path, *p, *prefix, buf[MAXPATHLEN+2];
66 int len;
67
68 if (!PyArg_ParseTuple(args, "s:zipimporter",
69 &path))
70 return -1;
71
72 len = strlen(path);
73 if (len == 0) {
74 PyErr_SetString(ZipImportError, "archive path is empty");
75 return -1;
76 }
77 if (len >= MAXPATHLEN) {
78 PyErr_SetString(ZipImportError,
79 "archive path too long");
80 return -1;
81 }
82 strcpy(buf, path);
83
84#ifdef ALTSEP
85 for (p = buf; *p; p++) {
86 if (*p == ALTSEP)
87 *p = SEP;
88 }
89#endif
90
91 path = NULL;
92 prefix = NULL;
93 for (;;) {
94 struct stat statbuf;
95 int rv;
96
97 rv = stat(buf, &statbuf);
98 if (rv == 0) {
99 /* it exists */
100 if (S_ISREG(statbuf.st_mode))
101 /* it's a file */
102 path = buf;
103 break;
104 }
105 /* back up one path element */
Just van Rossumd35c6db2003-01-02 12:55:48 +0000106 p = strrchr(buf, SEP);
Just van Rossum52e14d62002-12-30 22:08:05 +0000107 if (prefix != NULL)
108 *prefix = SEP;
109 if (p == NULL)
110 break;
111 *p = '\0';
112 prefix = p;
113 }
114 if (path != NULL) {
115 PyObject *files;
116 files = PyDict_GetItemString(zip_directory_cache, path);
117 if (files == NULL) {
118 files = read_directory(buf);
119 if (files == NULL)
120 return -1;
121 if (PyDict_SetItemString(zip_directory_cache, path,
122 files) != 0)
123 return -1;
124 }
125 else
126 Py_INCREF(files);
127 self->files = files;
128 }
129 else {
130 PyErr_SetString(ZipImportError, "not a Zip file");
131 return -1;
132 }
133
134 if (prefix == NULL)
135 prefix = "";
136 else {
137 prefix++;
138 len = strlen(prefix);
139 if (prefix[len-1] != SEP) {
140 /* add trailing SEP */
141 prefix[len] = SEP;
142 prefix[len + 1] = '\0';
143 }
144 }
145
146 self->archive = PyString_FromString(buf);
147 if (self->archive == NULL)
148 return -1;
149
150 self->prefix = PyString_FromString(prefix);
151 if (self->prefix == NULL)
152 return -1;
153
154 return 0;
155}
156
157/* GC support. */
158static int
159zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
160{
161 ZipImporter *self = (ZipImporter *)obj;
162 int err;
163
164 if (self->files != NULL) {
165 err = visit(self->files, arg);
166 if (err)
167 return err;
168 }
169 return 0;
170}
171
172static void
173zipimporter_dealloc(ZipImporter *self)
174{
175 PyObject_GC_UnTrack(self);
176 Py_XDECREF(self->archive);
Just van Rossumf8b6de12002-12-31 09:51:59 +0000177 Py_XDECREF(self->prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000178 Py_XDECREF(self->files);
179 self->ob_type->tp_free((PyObject *)self);
180}
181
182static PyObject *
183zipimporter_repr(ZipImporter *self)
184{
185 char buf[500];
186 char *archive = "???";
187 char *prefix = "";
188
189 if (self->archive != NULL && PyString_Check(self->archive))
190 archive = PyString_AsString(self->archive);
191 if (self->prefix != NULL && PyString_Check(self->prefix))
192 prefix = PyString_AsString(self->prefix);
193 if (prefix != NULL && *prefix)
194 PyOS_snprintf(buf, sizeof(buf),
195 "<zipimporter object \"%.300s%c%.150s\">",
196 archive, SEP, prefix);
197 else
198 PyOS_snprintf(buf, sizeof(buf),
199 "<zipimporter object \"%.300s\">",
200 archive);
201 return PyString_FromString(buf);
202}
203
204/* return fullname.split(".")[-1] */
205static char *
206get_subname(char *fullname)
207{
208 char *subname = strrchr(fullname, '.');
209 if (subname == NULL)
210 subname = fullname;
211 else
212 subname++;
213 return subname;
214}
215
216/* Given a (sub)modulename, write the potential file path in the
217 archive (without extension) to the path buffer. Return the
218 length of the resulting string. */
219static int
220make_filename(char *prefix, char *name, char *path)
221{
222 int len;
223 char *p;
224
225 len = strlen(prefix);
226
227 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
228 if (len + strlen(name) + 13 >= MAXPATHLEN) {
229 PyErr_SetString(ZipImportError, "path too long");
230 return -1;
231 }
232
233 strcpy(path, prefix);
234 strcpy(path + len, name);
235 for (p = path + len; *p; p++) {
236 if (*p == '.')
237 *p = SEP;
238 }
239 len += strlen(name);
240 return len;
241}
242
243enum module_info {
244 MI_ERROR,
245 MI_NOT_FOUND,
246 MI_MODULE,
247 MI_PACKAGE
248};
249
250/* Return some information about a module. */
251static enum module_info
252get_module_info(ZipImporter *self, char *fullname)
253{
254 char *subname, path[MAXPATHLEN + 1];
255 int len;
256 struct st_zip_searchorder *zso;
257
258 subname = get_subname(fullname);
259
260 len = make_filename(PyString_AsString(self->prefix), subname, path);
261 if (len < 0)
262 return MI_ERROR;
263
264 for (zso = zip_searchorder; *zso->suffix; zso++) {
265 strcpy(path + len, zso->suffix);
266 if (PyDict_GetItemString(self->files, path) != NULL) {
267 if (zso->type & IS_PACKAGE)
268 return MI_PACKAGE;
269 else
270 return MI_MODULE;
271 }
272 }
273 return MI_NOT_FOUND;
274}
275
276/* Check whether we can satisfy the import of the module named by
277 'fullname'. Return self if we can, None if we can't. */
278static PyObject *
279zipimporter_find_module(PyObject *obj, PyObject *args)
280{
281 ZipImporter *self = (ZipImporter *)obj;
282 PyObject *path = NULL;
283 char *fullname;
284 enum module_info mi;
285
286 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
287 &fullname, &path))
288 return NULL;
289
290 mi = get_module_info(self, fullname);
291 if (mi == MI_ERROR)
292 return NULL;
293 if (mi == MI_NOT_FOUND) {
294 Py_INCREF(Py_None);
295 return Py_None;
296 }
297 Py_INCREF(self);
298 return (PyObject *)self;
299}
300
301/* Load and return the module named by 'fullname'. */
302static PyObject *
303zipimporter_load_module(PyObject *obj, PyObject *args)
304{
305 ZipImporter *self = (ZipImporter *)obj;
306 PyObject *code, *mod, *dict;
307 char *fullname, *modpath;
308 int ispackage;
309
310 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
311 &fullname))
312 return NULL;
313
314 code = get_module_code(self, fullname, &ispackage, &modpath);
315 if (code == NULL)
316 return NULL;
317
318 mod = PyImport_AddModule(fullname);
319 if (mod == NULL) {
320 Py_DECREF(code);
321 return NULL;
322 }
323 dict = PyModule_GetDict(mod);
324
325 /* mod.__loader__ = self */
326 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
327 goto error;
328
329 if (ispackage) {
330 /* add __path__ to the module *before* the code gets
331 executed */
332 PyObject *pkgpath, *fullpath;
333 char *prefix = PyString_AsString(self->prefix);
334 char *subname = get_subname(fullname);
335 int err;
336
337 fullpath = PyString_FromFormat("%s%c%s%s",
338 PyString_AsString(self->archive),
339 SEP,
340 *prefix ? prefix : "",
341 subname);
342 if (fullpath == NULL)
343 goto error;
344
345 pkgpath = Py_BuildValue("[O]", fullpath);
346 Py_DECREF(fullpath);
347 if (pkgpath == NULL)
348 goto error;
349 err = PyDict_SetItemString(dict, "__path__", pkgpath);
350 Py_DECREF(pkgpath);
351 if (err != 0)
352 goto error;
353 }
354 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
355 Py_DECREF(code);
356 if (Py_VerboseFlag)
357 PySys_WriteStderr("import %s # loaded from Zip %s\n",
358 fullname, modpath);
359 return mod;
360error:
361 Py_DECREF(code);
362 Py_DECREF(mod);
363 return NULL;
364}
365
366/* Return a bool signifying whether the module is a package or not. */
367static PyObject *
368zipimporter_is_package(PyObject *obj, PyObject *args)
369{
370 ZipImporter *self = (ZipImporter *)obj;
371 char *fullname;
372 enum module_info mi;
373
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000374 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
Just van Rossum52e14d62002-12-30 22:08:05 +0000375 &fullname))
376 return NULL;
377
378 mi = get_module_info(self, fullname);
379 if (mi == MI_ERROR)
380 return NULL;
381 if (mi == MI_NOT_FOUND) {
382 PyErr_Format(ZipImportError, "can't find module '%.200s'",
383 fullname);
384 return NULL;
385 }
386 return PyBool_FromLong(mi == MI_PACKAGE);
387}
388
389static PyObject *
390zipimporter_get_data(PyObject *obj, PyObject *args)
391{
392 ZipImporter *self = (ZipImporter *)obj;
393 char *path;
394#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000395 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000396#endif
397 PyObject *toc_entry;
398 int len;
399
400 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
401 return NULL;
402
403#ifdef ALTSEP
404 if (strlen(path) >= MAXPATHLEN) {
405 PyErr_SetString(ZipImportError, "path too long");
406 return NULL;
407 }
408 strcpy(buf, path);
409 for (p = buf; *p; p++) {
410 if (*p == ALTSEP)
411 *p = SEP;
412 }
413 path = buf;
414#endif
415 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000416 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000417 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
418 path[len] == SEP) {
419 path = path + len + 1;
420 }
421
422 toc_entry = PyDict_GetItemString(self->files, path);
423 if (toc_entry == NULL) {
424 PyErr_Format(PyExc_IOError, "file not found [%.200s]",
425 path);
426 return NULL;
427 }
428 return get_data(PyString_AsString(self->archive), toc_entry);
429}
430
431static PyObject *
432zipimporter_get_code(PyObject *obj, PyObject *args)
433{
434 ZipImporter *self = (ZipImporter *)obj;
435 char *fullname;
436
437 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
438 return NULL;
439
440 return get_module_code(self, fullname, NULL, NULL);
441}
442
443static PyObject *
444zipimporter_get_source(PyObject *obj, PyObject *args)
445{
446 ZipImporter *self = (ZipImporter *)obj;
447 PyObject *toc_entry;
448 char *fullname, *subname, path[MAXPATHLEN+1];
449 int len;
450 enum module_info mi;
451
452 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
453 return NULL;
454
455 mi = get_module_info(self, fullname);
456 if (mi == MI_ERROR)
457 return NULL;
458 if (mi == MI_NOT_FOUND) {
459 PyErr_Format(ZipImportError, "can't find module '%.200s'",
460 fullname);
461 return NULL;
462 }
463 subname = get_subname(fullname);
464
465 len = make_filename(PyString_AsString(self->prefix), subname, path);
466 if (len < 0)
467 return NULL;
468
469 if (mi == MI_PACKAGE) {
470 path[len] = SEP;
471 strcpy(path + len + 1, "__init__.py");
472 }
473 else
474 strcpy(path + len, ".py");
475
476 toc_entry = PyDict_GetItemString(self->files, path);
477 if (toc_entry != NULL)
478 return get_data(PyString_AsString(self->archive), toc_entry);
479
480 /* we have the module, but no source */
481 Py_INCREF(Py_None);
482 return Py_None;
483}
484
485PyDoc_STRVAR(doc_find_module,
486"find_module(fullname, path=None) -> self or None.\n\
487\n\
488Search for a module specified by 'fullname'. 'fullname' must be the\n\
489fully qualified (dotted) module name. It returns the zipimporter\n\
490instance itself if the module was found, or None if it wasn't.\n\
491The optional 'path' argument is ignored -- it's there for compatibility\n\
492with the importer protocol.");
493
494PyDoc_STRVAR(doc_load_module,
495"load_module(fullname) -> module.\n\
496\n\
497Load the module specified by 'fullname'. 'fullname' must be the\n\
498fully qualified (dotted) module name. It returns the imported\n\
499module, or raises ZipImportError if it wasn't found.");
500
501PyDoc_STRVAR(doc_get_data,
502"get_data(pathname) -> string with file data.\n\
503\n\
504Return the data associated with 'pathname'. Raise IOError if\n\
505the file wasn't found.");
506
507PyDoc_STRVAR(doc_is_package,
508"is_package(fullname) -> bool.\n\
509\n\
510Return True if the module specified by fullname is a package.\n\
511Raise ZipImportError is the module couldn't be found.");
512
513PyDoc_STRVAR(doc_get_code,
514"get_code(fullname) -> code object.\n\
515\n\
516Return the code object for the specified module. Raise ZipImportError\n\
517is the module couldn't be found.");
518
519PyDoc_STRVAR(doc_get_source,
520"get_source(fullname) -> source string.\n\
521\n\
522Return the source code for the specified module. Raise ZipImportError\n\
523is the module couldn't be found, return None if the archive does\n\
524contain the module, but has no source for it.");
525
526static PyMethodDef zipimporter_methods[] = {
527 {"find_module", zipimporter_find_module, METH_VARARGS,
528 doc_find_module},
529 {"load_module", zipimporter_load_module, METH_VARARGS,
530 doc_load_module},
531 {"get_data", zipimporter_get_data, METH_VARARGS,
532 doc_get_data},
533 {"get_code", zipimporter_get_code, METH_VARARGS,
534 doc_get_code},
535 {"get_source", zipimporter_get_source, METH_VARARGS,
536 doc_get_source},
537 {"is_package", zipimporter_is_package, METH_VARARGS,
538 doc_is_package},
539 {NULL, NULL} /* sentinel */
540};
541
542static PyMemberDef zipimporter_members[] = {
543 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
544 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
545 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
546 {NULL}
547};
548
549PyDoc_STRVAR(zipimporter_doc,
550"zipimporter(archivepath) -> zipimporter object\n\
551\n\
552Create a new zipimporter instance. 'archivepath' must be a path to\n\
553a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
554a valid Zip archive.");
555
556#define DEFERRED_ADDRESS(ADDR) 0
557
558static PyTypeObject ZipImporter_Type = {
559 PyObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type))
560 0,
561 "zipimport.zipimporter",
562 sizeof(ZipImporter),
563 0, /* tp_itemsize */
564 (destructor)zipimporter_dealloc, /* tp_dealloc */
565 0, /* tp_print */
566 0, /* tp_getattr */
567 0, /* tp_setattr */
568 0, /* tp_compare */
569 (reprfunc)zipimporter_repr, /* tp_repr */
570 0, /* tp_as_number */
571 0, /* tp_as_sequence */
572 0, /* tp_as_mapping */
573 0, /* tp_hash */
574 0, /* tp_call */
575 0, /* tp_str */
576 PyObject_GenericGetAttr, /* tp_getattro */
577 0, /* tp_setattro */
578 0, /* tp_as_buffer */
579 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
580 Py_TPFLAGS_HAVE_GC, /* tp_flags */
581 zipimporter_doc, /* tp_doc */
582 zipimporter_traverse, /* tp_traverse */
583 0, /* tp_clear */
584 0, /* tp_richcompare */
585 0, /* tp_weaklistoffset */
586 0, /* tp_iter */
587 0, /* tp_iternext */
588 zipimporter_methods, /* tp_methods */
589 zipimporter_members, /* tp_members */
590 0, /* tp_getset */
591 0, /* tp_base */
592 0, /* tp_dict */
593 0, /* tp_descr_get */
594 0, /* tp_descr_set */
595 0, /* tp_dictoffset */
596 (initproc)zipimporter_init, /* tp_init */
597 PyType_GenericAlloc, /* tp_alloc */
598 PyType_GenericNew, /* tp_new */
599 PyObject_GC_Del, /* tp_free */
600};
601
602
603/* implementation */
604
Just van Rossum52e14d62002-12-30 22:08:05 +0000605/* Given a buffer, return the long that is represented by the first
606 4 bytes, encoded as little endian. This partially reimplements
607 marshal.c:r_long() */
608static long
609get_long(unsigned char *buf) {
610 long x;
611 x = buf[0];
612 x |= (long)buf[1] << 8;
613 x |= (long)buf[2] << 16;
614 x |= (long)buf[3] << 24;
615#if SIZEOF_LONG > 4
616 /* Sign extension for 64-bit machines */
617 x |= -(x & 0x80000000L);
618#endif
619 return x;
620}
621
622/*
623 read_directory(archive) -> files dict (new reference)
624
625 Given a path to a Zip archive, build a dict, mapping file names
626 (local to the archive, using SEP as a separator) to toc entries.
627
628 A toc_entry is a tuple:
629
630 (compress, # compression kind; 0 for uncompressed
631 data_size, # size of compressed data on disk
632 file_size, # size of decompressed data
633 file_offset, # offset of file header from start of archive
634 time, # mod time of file (in dos format)
635 date, # mod data of file (in dos format)
636 crc, # crc checksum of the data
637 )
638
639 Directories can be recognized by the trailing SEP in the name,
640 data_size and file_offset are 0.
641*/
642static PyObject *
643read_directory(char *archive)
644{
645 PyObject *files = NULL;
646 FILE *fp;
647 long compress, crc, data_size, file_size, file_offset, date, time;
648 long header_offset, name_size, header_size, header_end;
649 long i, l, length, count;
650 char path[MAXPATHLEN + 5];
651 char name[MAXPATHLEN + 5];
652 char *p, endof_central_dir[22];
653
654 if (strlen(archive) > MAXPATHLEN) {
655 PyErr_SetString(PyExc_OverflowError,
656 "Zip path name is too long");
657 return NULL;
658 }
659 strcpy(path, archive);
660
661 fp = fopen(archive, "rb");
662 if (fp == NULL) {
663 PyErr_Format(ZipImportError, "can't open Zip file: "
664 "'%.200s'", archive);
665 return NULL;
666 }
Just van Rossumf4ecc752003-02-28 08:54:01 +0000667 fseek(fp, -22, SEEK_END);
Just van Rossum52e14d62002-12-30 22:08:05 +0000668 header_end = ftell(fp);
669 if (fread(endof_central_dir, 1, 22, fp) != 22) {
670 fclose(fp);
671 PyErr_Format(ZipImportError, "can't read Zip file: "
672 "'%.200s'", archive);
673 return NULL;
674 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000675 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000676 /* Bad: End of Central Dir signature */
677 fclose(fp);
678 PyErr_Format(ZipImportError, "not a Zip file: "
679 "'%.200s'", archive);
680 return NULL;
681 }
682
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000683 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Just van Rossum52e14d62002-12-30 22:08:05 +0000684
685 files = PyDict_New();
686 if (files == NULL)
687 goto error;
688
689 length = (long)strlen(path);
690 path[length] = SEP;
691
692 /* Start of Central Directory */
693 count = 0;
694 for (;;) {
695 PyObject *t;
696 int err;
697
698 fseek(fp, header_offset, 0); /* Start of file header */
699 l = PyMarshal_ReadLongFromFile(fp);
700 if (l != 0x02014B50)
701 break; /* Bad: Central Dir File Header */
702 fseek(fp, header_offset + 10, 0);
703 compress = PyMarshal_ReadShortFromFile(fp);
704 time = PyMarshal_ReadShortFromFile(fp);
705 date = PyMarshal_ReadShortFromFile(fp);
706 crc = PyMarshal_ReadLongFromFile(fp);
707 data_size = PyMarshal_ReadLongFromFile(fp);
708 file_size = PyMarshal_ReadLongFromFile(fp);
709 name_size = PyMarshal_ReadShortFromFile(fp);
710 header_size = 46 + name_size +
711 PyMarshal_ReadShortFromFile(fp) +
712 PyMarshal_ReadShortFromFile(fp);
713 fseek(fp, header_offset + 42, 0);
714 file_offset = PyMarshal_ReadLongFromFile(fp);
715 if (name_size > MAXPATHLEN)
716 name_size = MAXPATHLEN;
717
718 p = name;
719 for (i = 0; i < name_size; i++) {
720 *p = (char)getc(fp);
721 if (*p == '/')
722 *p = SEP;
723 p++;
724 }
725 *p = 0; /* Add terminating null byte */
726 header_offset += header_size;
727
728 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
729
730 t = Py_BuildValue("siiiiiii", path, compress, data_size,
731 file_size, file_offset, time, date, crc);
732 if (t == NULL)
733 goto error;
734 err = PyDict_SetItemString(files, name, t);
735 Py_DECREF(t);
736 if (err != 0)
737 goto error;
738 count++;
739 }
740 fclose(fp);
741 if (Py_VerboseFlag)
742 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
743 count, archive);
744 return files;
745error:
746 fclose(fp);
747 Py_XDECREF(files);
748 return NULL;
749}
750
751/* Return the zlib.decompress function object, or NULL if zlib couldn't
752 be imported. The function is cached when found, so subsequent calls
753 don't import zlib again. Returns a *borrowed* reference.
754 XXX This makes zlib.decompress immortal. */
755static PyObject *
756get_decompress_func(void)
757{
758 static PyObject *decompress = NULL;
759
760 if (decompress == NULL) {
761 PyObject *zlib;
762 static int importing_zlib = 0;
763
764 if (importing_zlib != 0)
765 /* Someone has a zlib.py[co] in their Zip file;
766 let's avoid a stack overflow. */
767 return NULL;
768 importing_zlib = 1;
769 zlib = PyImport_ImportModule("zlib"); /* import zlib */
770 importing_zlib = 0;
771 if (zlib != NULL) {
772 decompress = PyObject_GetAttrString(zlib,
773 "decompress");
774 Py_DECREF(zlib);
775 }
776 else
777 PyErr_Clear();
778 if (Py_VerboseFlag)
779 PySys_WriteStderr("# zipimport: zlib %s\n",
780 zlib != NULL ? "available": "UNAVAILABLE");
781 }
782 return decompress;
783}
784
785/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
786 data as a new reference. */
787static PyObject *
788get_data(char *archive, PyObject *toc_entry)
789{
790 PyObject *raw_data, *data = NULL, *decompress;
791 char *buf;
792 FILE *fp;
793 int err, bytes_read = 0;
794 long l;
795 char *datapath;
796 long compress, data_size, file_size, file_offset;
797 long time, date, crc;
798
Neal Norwitz0c0aad92003-02-18 03:37:49 +0000799 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
Just van Rossum52e14d62002-12-30 22:08:05 +0000800 &data_size, &file_size, &file_offset, &time,
801 &date, &crc)) {
802 return NULL;
803 }
804
805 fp = fopen(archive, "rb");
806 if (!fp) {
807 PyErr_Format(PyExc_IOError,
808 "zipimport: can not open file %s", archive);
809 return NULL;
810 }
811
812 /* Check to make sure the local file header is correct */
813 fseek(fp, file_offset, 0);
814 l = PyMarshal_ReadLongFromFile(fp);
815 if (l != 0x04034B50) {
816 /* Bad: Local File Header */
817 PyErr_Format(ZipImportError,
818 "bad local file header in %s",
819 archive);
820 fclose(fp);
821 return NULL;
822 }
823 fseek(fp, file_offset + 26, 0);
824 l = 30 + PyMarshal_ReadShortFromFile(fp) +
825 PyMarshal_ReadShortFromFile(fp); /* local header size */
826 file_offset += l; /* Start of file data */
827
828 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
829 data_size : data_size + 1);
830 if (raw_data == NULL) {
831 fclose(fp);
832 return NULL;
833 }
834 buf = PyString_AsString(raw_data);
835
836 err = fseek(fp, file_offset, 0);
837 if (err == 0)
838 bytes_read = fread(buf, 1, data_size, fp);
839 fclose(fp);
840 if (err || bytes_read != data_size) {
841 PyErr_SetString(PyExc_IOError,
842 "zipimport: can't read data");
843 Py_DECREF(raw_data);
844 return NULL;
845 }
846
847 if (compress != 0) {
848 buf[data_size] = 'Z'; /* saw this in zipfile.py */
849 data_size++;
850 }
851 buf[data_size] = '\0';
852
853 if (compress == 0) /* data is not compressed */
854 return raw_data;
855
856 /* Decompress with zlib */
857 decompress = get_decompress_func();
858 if (decompress == NULL) {
859 PyErr_SetString(ZipImportError,
860 "can't decompress data; "
861 "zlib not available");
862 goto error;
863 }
864 data = PyObject_CallFunction(decompress, "Ol", raw_data, -15);
865error:
866 Py_DECREF(raw_data);
867 return data;
868}
869
870/* Lenient date/time comparison function. The precision of the mtime
871 in the archive is lower than the mtime stored in a .pyc: we
872 must allow a difference of at most one second. */
873static int
874eq_mtime(time_t t1, time_t t2)
875{
876 time_t d = t1 - t2;
877 if (d < 0)
878 d = -d;
879 /* dostime only stores even seconds, so be lenient */
880 return d <= 1;
881}
882
883/* Given the contents of a .py[co] file in a buffer, unmarshal the data
884 and return the code object. Return None if it the magic word doesn't
885 match (we do this instead of raising an exception as we fall back
886 to .py if available and we don't want to mask other errors).
887 Returns a new reference. */
888static PyObject *
889unmarshal_code(char *pathname, PyObject *data, time_t mtime)
890{
891 PyObject *code;
892 char *buf = PyString_AsString(data);
893 int size = PyString_Size(data);
894
895 if (size <= 9) {
896 PyErr_SetString(ZipImportError,
897 "bad pyc data");
898 return NULL;
899 }
900
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000901 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000902 if (Py_VerboseFlag)
903 PySys_WriteStderr("# %s has bad magic\n",
904 pathname);
905 Py_INCREF(Py_None);
906 return Py_None; /* signal caller to try alternative */
907 }
908
Just van Rossum9a3129c2003-01-03 11:18:56 +0000909 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
910 mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000911 if (Py_VerboseFlag)
912 PySys_WriteStderr("# %s has bad mtime\n",
913 pathname);
914 Py_INCREF(Py_None);
915 return Py_None; /* signal caller to try alternative */
916 }
917
918 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
919 if (code == NULL)
920 return NULL;
921 if (!PyCode_Check(code)) {
922 Py_DECREF(code);
923 PyErr_Format(PyExc_TypeError,
924 "compiled module %.200s is not a code object",
925 pathname);
926 return NULL;
927 }
928 return code;
929}
930
931/* Replace any occurances of "\r\n?" in the input string with "\n".
932 This converts DOS and Mac line endings to Unix line endings.
933 Also append a trailing "\n" to be compatible with
934 PyParser_SimpleParseFile(). Returns a new reference. */
935static PyObject *
936normalize_line_endings(PyObject *source)
937{
Just van Rossum9a3129c2003-01-03 11:18:56 +0000938 char *buf, *q, *p = PyString_AsString(source);
Just van Rossum52e14d62002-12-30 22:08:05 +0000939 PyObject *fixed_source;
940
Just van Rossum9a3129c2003-01-03 11:18:56 +0000941 /* one char extra for trailing \n and one for terminating \0 */
942 buf = PyMem_Malloc(PyString_Size(source) + 2);
943 if (buf == NULL) {
944 PyErr_SetString(PyExc_MemoryError,
945 "zipimport: no memory to allocate "
946 "source buffer");
Just van Rossum52e14d62002-12-30 22:08:05 +0000947 return NULL;
Just van Rossum9a3129c2003-01-03 11:18:56 +0000948 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000949 /* replace "\r\n?" by "\n" */
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000950 for (q = buf; *p != '\0'; p++) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000951 if (*p == '\r') {
952 *q++ = '\n';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000953 if (*(p + 1) == '\n')
Just van Rossum52e14d62002-12-30 22:08:05 +0000954 p++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000955 }
956 else
957 *q++ = *p;
Just van Rossum52e14d62002-12-30 22:08:05 +0000958 }
959 *q++ = '\n'; /* add trailing \n */
960 *q = '\0';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000961 fixed_source = PyString_FromString(buf);
962 PyMem_Free(buf);
Just van Rossum52e14d62002-12-30 22:08:05 +0000963 return fixed_source;
964}
965
966/* Given a string buffer containing Python source code, compile it
967 return and return a code object as a new reference. */
968static PyObject *
969compile_source(char *pathname, PyObject *source)
970{
971 PyObject *code, *fixed_source;
972
973 fixed_source = normalize_line_endings(source);
974 if (fixed_source == NULL)
975 return NULL;
976
977 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
978 Py_file_input);
979 Py_DECREF(fixed_source);
980 return code;
981}
982
983/* Convert the date/time values found in the Zip archive to a value
984 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +0000985static time_t
986parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +0000987{
988 struct tm stm;
989
990 stm.tm_sec = (dostime & 0x1f) * 2;
991 stm.tm_min = (dostime >> 5) & 0x3f;
992 stm.tm_hour = (dostime >> 11) & 0x1f;
993 stm.tm_mday = dosdate & 0x1f;
994 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
995 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
Just van Rossum547eb422003-04-08 20:07:15 +0000996 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +0000997
998 return mktime(&stm);
999}
1000
1001/* Given a path to a .pyc or .pyo file in the archive, return the
1002 modifictaion time of the matching .py file, or 0 if no source
1003 is available. */
1004static time_t
1005get_mtime_of_source(ZipImporter *self, char *path)
1006{
1007 PyObject *toc_entry;
1008 time_t mtime = 0;
1009 int lastchar = strlen(path) - 1;
1010 char savechar = path[lastchar];
1011 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1012 toc_entry = PyDict_GetItemString(self->files, path);
1013 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1014 PyTuple_Size(toc_entry) == 8) {
Just van Rossumf8b6de12002-12-31 09:51:59 +00001015 /* fetch the time stamp of the .py file for comparison
1016 with an embedded pyc time stamp */
1017 int time, date;
Just van Rossum52e14d62002-12-30 22:08:05 +00001018 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1019 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1020 mtime = parse_dostime(time, date);
1021 }
1022 path[lastchar] = savechar;
1023 return mtime;
1024}
1025
1026/* Return the code object for the module named by 'fullname' from the
1027 Zip archive as a new reference. */
1028static PyObject *
1029get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1030 time_t mtime, PyObject *toc_entry)
1031{
1032 PyObject *data, *code;
1033 char *modpath;
1034 char *archive = PyString_AsString(self->archive);
1035
1036 if (archive == NULL)
1037 return NULL;
1038
1039 data = get_data(archive, toc_entry);
1040 if (data == NULL)
1041 return NULL;
1042
1043 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1044
1045 if (isbytecode) {
1046 code = unmarshal_code(modpath, data, mtime);
1047 }
1048 else {
1049 code = compile_source(modpath, data);
1050 }
1051 Py_DECREF(data);
1052 return code;
1053}
1054
1055/* Get the code object assoiciated with the module specified by
1056 'fullname'. */
1057static PyObject *
1058get_module_code(ZipImporter *self, char *fullname,
1059 int *p_ispackage, char **p_modpath)
1060{
1061 PyObject *toc_entry;
1062 char *subname, path[MAXPATHLEN + 1];
1063 int len;
1064 struct st_zip_searchorder *zso;
1065
1066 subname = get_subname(fullname);
1067
1068 len = make_filename(PyString_AsString(self->prefix), subname, path);
1069 if (len < 0)
1070 return NULL;
1071
1072 for (zso = zip_searchorder; *zso->suffix; zso++) {
1073 PyObject *code = NULL;
1074
1075 strcpy(path + len, zso->suffix);
1076 if (Py_VerboseFlag > 1)
1077 PySys_WriteStderr("# trying %s%c%s\n",
1078 PyString_AsString(self->archive),
1079 SEP, path);
1080 toc_entry = PyDict_GetItemString(self->files, path);
1081 if (toc_entry != NULL) {
1082 time_t mtime = 0;
1083 int ispackage = zso->type & IS_PACKAGE;
1084 int isbytecode = zso->type & IS_BYTECODE;
1085
1086 if (isbytecode)
1087 mtime = get_mtime_of_source(self, path);
1088 if (p_ispackage != NULL)
1089 *p_ispackage = ispackage;
1090 code = get_code_from_data(self, ispackage,
1091 isbytecode, mtime,
1092 toc_entry);
1093 if (code == Py_None) {
1094 /* bad magic number or non-matching mtime
1095 in byte code, try next */
1096 Py_DECREF(code);
1097 continue;
1098 }
1099 if (code != NULL && p_modpath != NULL)
1100 *p_modpath = PyString_AsString(
1101 PyTuple_GetItem(toc_entry, 0));
1102 return code;
1103 }
1104 }
1105 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1106 return NULL;
1107}
1108
1109
1110/* Module init */
1111
1112PyDoc_STRVAR(zipimport_doc,
1113"zipimport provides support for importing Python modules from Zip archives.\n\
1114\n\
1115This module exports three objects:\n\
1116- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1117- ZipImporterError: exception raised by zipimporter objects. It's a\n\
1118 subclass of ImportError, so it can be caught as ImportError, too.\n\
1119- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1120 info dicts, as used in zipimporter._files.\n\
1121\n\
1122It is usually not needed to use the zipimport module explicitly; it is\n\
1123used by the builtin import mechanism for sys.path items that are paths\n\
1124to Zip archives.");
1125
1126PyMODINIT_FUNC
1127initzipimport(void)
1128{
1129 PyObject *mod;
1130
1131 if (PyType_Ready(&ZipImporter_Type) < 0)
1132 return;
1133
1134 /* Correct directory separator */
1135 zip_searchorder[0].suffix[0] = SEP;
1136 zip_searchorder[1].suffix[0] = SEP;
1137 zip_searchorder[2].suffix[0] = SEP;
1138 if (Py_OptimizeFlag) {
1139 /* Reverse *.pyc and *.pyo */
1140 struct st_zip_searchorder tmp;
1141 tmp = zip_searchorder[0];
1142 zip_searchorder[0] = zip_searchorder[1];
1143 zip_searchorder[1] = tmp;
1144 tmp = zip_searchorder[3];
1145 zip_searchorder[3] = zip_searchorder[4];
1146 zip_searchorder[4] = tmp;
1147 }
1148
1149 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1150 NULL, PYTHON_API_VERSION);
1151
1152 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1153 PyExc_ImportError, NULL);
1154 if (ZipImportError == NULL)
1155 return;
1156
1157 Py_INCREF(ZipImportError);
1158 if (PyModule_AddObject(mod, "ZipImportError",
1159 ZipImportError) < 0)
1160 return;
1161
1162 Py_INCREF(&ZipImporter_Type);
1163 if (PyModule_AddObject(mod, "zipimporter",
1164 (PyObject *)&ZipImporter_Type) < 0)
1165 return;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001166
Just van Rossum52e14d62002-12-30 22:08:05 +00001167 zip_directory_cache = PyDict_New();
1168 if (zip_directory_cache == NULL)
1169 return;
1170 Py_INCREF(zip_directory_cache);
1171 if (PyModule_AddObject(mod, "_zip_directory_cache",
1172 zip_directory_cache) < 0)
1173 return;
1174}