blob: 355f69c1269e5a6cf20fec89c54ecbc37a0560e6 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include "compile.h"
6#include <time.h>
7
8
9#define IS_SOURCE 0x0
10#define IS_BYTECODE 0x1
11#define IS_PACKAGE 0x2
12
13struct st_zip_searchorder {
14 char suffix[14];
15 int type;
16};
17
18/* zip_searchorder defines how we search for a module in the Zip
19 archive: we first search for a package __init__, then for
20 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
21 are swapped by initzipimport() if we run in optimized mode. Also,
22 '/' is replaced by SEP there. */
23struct st_zip_searchorder zip_searchorder[] = {
24 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
26 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
27 {".pyc", IS_BYTECODE},
28 {".pyo", IS_BYTECODE},
29 {".py", IS_SOURCE},
30 {"", 0}
31};
32
33/* zipimporter object definition and support */
34
35typedef struct _zipimporter ZipImporter;
36
37struct _zipimporter {
38 PyObject_HEAD
39 PyObject *archive; /* pathname of the Zip archive */
40 PyObject *prefix; /* file prefix: "a/sub/directory/" */
41 PyObject *files; /* dict with file info {path: toc_entry} */
42};
43
44static PyTypeObject ZipImporter_Type;
45static PyObject *ZipImportError;
46static PyObject *zip_directory_cache = NULL;
47
48/* forward decls */
49static PyObject *read_directory(char *archive);
50static PyObject *get_data(char *archive, PyObject *toc_entry);
51static PyObject *get_module_code(ZipImporter *self, char *fullname,
52 int *p_ispackage, char **p_modpath);
53
54
55#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
56
57
58/* zipimporter.__init__
59 Split the "subdirectory" from the Zip archive path, lookup a matching
60 entry in sys.path_importer_cache, fetch the file directory from there
61 if found, or else read it from the archive. */
62static int
63zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
64{
65 char *path, *p, *prefix, buf[MAXPATHLEN+2];
66 int len;
67
68 if (!PyArg_ParseTuple(args, "s:zipimporter",
69 &path))
70 return -1;
71
72 len = strlen(path);
73 if (len == 0) {
74 PyErr_SetString(ZipImportError, "archive path is empty");
75 return -1;
76 }
77 if (len >= MAXPATHLEN) {
78 PyErr_SetString(ZipImportError,
79 "archive path too long");
80 return -1;
81 }
82 strcpy(buf, path);
83
84#ifdef ALTSEP
85 for (p = buf; *p; p++) {
86 if (*p == ALTSEP)
87 *p = SEP;
88 }
89#endif
90
91 path = NULL;
92 prefix = NULL;
93 for (;;) {
94 struct stat statbuf;
95 int rv;
96
97 rv = stat(buf, &statbuf);
98 if (rv == 0) {
99 /* it exists */
100 if (S_ISREG(statbuf.st_mode))
101 /* it's a file */
102 path = buf;
103 break;
104 }
105 /* back up one path element */
Just van Rossumd35c6db2003-01-02 12:55:48 +0000106 p = strrchr(buf, SEP);
Just van Rossum52e14d62002-12-30 22:08:05 +0000107 if (prefix != NULL)
108 *prefix = SEP;
109 if (p == NULL)
110 break;
111 *p = '\0';
112 prefix = p;
113 }
114 if (path != NULL) {
115 PyObject *files;
116 files = PyDict_GetItemString(zip_directory_cache, path);
117 if (files == NULL) {
118 files = read_directory(buf);
119 if (files == NULL)
120 return -1;
121 if (PyDict_SetItemString(zip_directory_cache, path,
122 files) != 0)
123 return -1;
124 }
125 else
126 Py_INCREF(files);
127 self->files = files;
128 }
129 else {
130 PyErr_SetString(ZipImportError, "not a Zip file");
131 return -1;
132 }
133
134 if (prefix == NULL)
135 prefix = "";
136 else {
137 prefix++;
138 len = strlen(prefix);
139 if (prefix[len-1] != SEP) {
140 /* add trailing SEP */
141 prefix[len] = SEP;
142 prefix[len + 1] = '\0';
143 }
144 }
145
146 self->archive = PyString_FromString(buf);
147 if (self->archive == NULL)
148 return -1;
149
150 self->prefix = PyString_FromString(prefix);
151 if (self->prefix == NULL)
152 return -1;
153
154 return 0;
155}
156
157/* GC support. */
158static int
159zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
160{
161 ZipImporter *self = (ZipImporter *)obj;
162 int err;
163
164 if (self->files != NULL) {
165 err = visit(self->files, arg);
166 if (err)
167 return err;
168 }
169 return 0;
170}
171
172static void
173zipimporter_dealloc(ZipImporter *self)
174{
175 PyObject_GC_UnTrack(self);
176 Py_XDECREF(self->archive);
Just van Rossumf8b6de12002-12-31 09:51:59 +0000177 Py_XDECREF(self->prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000178 Py_XDECREF(self->files);
179 self->ob_type->tp_free((PyObject *)self);
180}
181
182static PyObject *
183zipimporter_repr(ZipImporter *self)
184{
185 char buf[500];
186 char *archive = "???";
187 char *prefix = "";
188
189 if (self->archive != NULL && PyString_Check(self->archive))
190 archive = PyString_AsString(self->archive);
191 if (self->prefix != NULL && PyString_Check(self->prefix))
192 prefix = PyString_AsString(self->prefix);
193 if (prefix != NULL && *prefix)
194 PyOS_snprintf(buf, sizeof(buf),
195 "<zipimporter object \"%.300s%c%.150s\">",
196 archive, SEP, prefix);
197 else
198 PyOS_snprintf(buf, sizeof(buf),
199 "<zipimporter object \"%.300s\">",
200 archive);
201 return PyString_FromString(buf);
202}
203
204/* return fullname.split(".")[-1] */
205static char *
206get_subname(char *fullname)
207{
208 char *subname = strrchr(fullname, '.');
209 if (subname == NULL)
210 subname = fullname;
211 else
212 subname++;
213 return subname;
214}
215
216/* Given a (sub)modulename, write the potential file path in the
217 archive (without extension) to the path buffer. Return the
218 length of the resulting string. */
219static int
220make_filename(char *prefix, char *name, char *path)
221{
222 int len;
223 char *p;
224
225 len = strlen(prefix);
226
227 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
228 if (len + strlen(name) + 13 >= MAXPATHLEN) {
229 PyErr_SetString(ZipImportError, "path too long");
230 return -1;
231 }
232
233 strcpy(path, prefix);
234 strcpy(path + len, name);
235 for (p = path + len; *p; p++) {
236 if (*p == '.')
237 *p = SEP;
238 }
239 len += strlen(name);
240 return len;
241}
242
243enum module_info {
244 MI_ERROR,
245 MI_NOT_FOUND,
246 MI_MODULE,
247 MI_PACKAGE
248};
249
250/* Return some information about a module. */
251static enum module_info
252get_module_info(ZipImporter *self, char *fullname)
253{
254 char *subname, path[MAXPATHLEN + 1];
255 int len;
256 struct st_zip_searchorder *zso;
257
258 subname = get_subname(fullname);
259
260 len = make_filename(PyString_AsString(self->prefix), subname, path);
261 if (len < 0)
262 return MI_ERROR;
263
264 for (zso = zip_searchorder; *zso->suffix; zso++) {
265 strcpy(path + len, zso->suffix);
266 if (PyDict_GetItemString(self->files, path) != NULL) {
267 if (zso->type & IS_PACKAGE)
268 return MI_PACKAGE;
269 else
270 return MI_MODULE;
271 }
272 }
273 return MI_NOT_FOUND;
274}
275
276/* Check whether we can satisfy the import of the module named by
277 'fullname'. Return self if we can, None if we can't. */
278static PyObject *
279zipimporter_find_module(PyObject *obj, PyObject *args)
280{
281 ZipImporter *self = (ZipImporter *)obj;
282 PyObject *path = NULL;
283 char *fullname;
284 enum module_info mi;
285
286 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
287 &fullname, &path))
288 return NULL;
289
290 mi = get_module_info(self, fullname);
291 if (mi == MI_ERROR)
292 return NULL;
293 if (mi == MI_NOT_FOUND) {
294 Py_INCREF(Py_None);
295 return Py_None;
296 }
297 Py_INCREF(self);
298 return (PyObject *)self;
299}
300
301/* Load and return the module named by 'fullname'. */
302static PyObject *
303zipimporter_load_module(PyObject *obj, PyObject *args)
304{
305 ZipImporter *self = (ZipImporter *)obj;
306 PyObject *code, *mod, *dict;
307 char *fullname, *modpath;
308 int ispackage;
309
310 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
311 &fullname))
312 return NULL;
313
314 code = get_module_code(self, fullname, &ispackage, &modpath);
315 if (code == NULL)
316 return NULL;
317
318 mod = PyImport_AddModule(fullname);
319 if (mod == NULL) {
320 Py_DECREF(code);
321 return NULL;
322 }
323 dict = PyModule_GetDict(mod);
324
325 /* mod.__loader__ = self */
326 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
327 goto error;
328
329 if (ispackage) {
330 /* add __path__ to the module *before* the code gets
331 executed */
332 PyObject *pkgpath, *fullpath;
333 char *prefix = PyString_AsString(self->prefix);
334 char *subname = get_subname(fullname);
335 int err;
336
337 fullpath = PyString_FromFormat("%s%c%s%s",
338 PyString_AsString(self->archive),
339 SEP,
340 *prefix ? prefix : "",
341 subname);
342 if (fullpath == NULL)
343 goto error;
344
345 pkgpath = Py_BuildValue("[O]", fullpath);
346 Py_DECREF(fullpath);
347 if (pkgpath == NULL)
348 goto error;
349 err = PyDict_SetItemString(dict, "__path__", pkgpath);
350 Py_DECREF(pkgpath);
351 if (err != 0)
352 goto error;
353 }
354 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
355 Py_DECREF(code);
356 if (Py_VerboseFlag)
357 PySys_WriteStderr("import %s # loaded from Zip %s\n",
358 fullname, modpath);
359 return mod;
360error:
361 Py_DECREF(code);
362 Py_DECREF(mod);
363 return NULL;
364}
365
366/* Return a bool signifying whether the module is a package or not. */
367static PyObject *
368zipimporter_is_package(PyObject *obj, PyObject *args)
369{
370 ZipImporter *self = (ZipImporter *)obj;
371 char *fullname;
372 enum module_info mi;
373
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000374 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
Just van Rossum52e14d62002-12-30 22:08:05 +0000375 &fullname))
376 return NULL;
377
378 mi = get_module_info(self, fullname);
379 if (mi == MI_ERROR)
380 return NULL;
381 if (mi == MI_NOT_FOUND) {
382 PyErr_Format(ZipImportError, "can't find module '%.200s'",
383 fullname);
384 return NULL;
385 }
386 return PyBool_FromLong(mi == MI_PACKAGE);
387}
388
389static PyObject *
390zipimporter_get_data(PyObject *obj, PyObject *args)
391{
392 ZipImporter *self = (ZipImporter *)obj;
393 char *path;
394#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000395 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000396#endif
397 PyObject *toc_entry;
398 int len;
399
400 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
401 return NULL;
402
403#ifdef ALTSEP
404 if (strlen(path) >= MAXPATHLEN) {
405 PyErr_SetString(ZipImportError, "path too long");
406 return NULL;
407 }
408 strcpy(buf, path);
409 for (p = buf; *p; p++) {
410 if (*p == ALTSEP)
411 *p = SEP;
412 }
413 path = buf;
414#endif
415 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000416 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000417 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
418 path[len] == SEP) {
419 path = path + len + 1;
420 }
421
422 toc_entry = PyDict_GetItemString(self->files, path);
423 if (toc_entry == NULL) {
424 PyErr_Format(PyExc_IOError, "file not found [%.200s]",
425 path);
426 return NULL;
427 }
428 return get_data(PyString_AsString(self->archive), toc_entry);
429}
430
431static PyObject *
432zipimporter_get_code(PyObject *obj, PyObject *args)
433{
434 ZipImporter *self = (ZipImporter *)obj;
435 char *fullname;
436
437 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
438 return NULL;
439
440 return get_module_code(self, fullname, NULL, NULL);
441}
442
443static PyObject *
444zipimporter_get_source(PyObject *obj, PyObject *args)
445{
446 ZipImporter *self = (ZipImporter *)obj;
447 PyObject *toc_entry;
448 char *fullname, *subname, path[MAXPATHLEN+1];
449 int len;
450 enum module_info mi;
451
452 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
453 return NULL;
454
455 mi = get_module_info(self, fullname);
456 if (mi == MI_ERROR)
457 return NULL;
458 if (mi == MI_NOT_FOUND) {
459 PyErr_Format(ZipImportError, "can't find module '%.200s'",
460 fullname);
461 return NULL;
462 }
463 subname = get_subname(fullname);
464
465 len = make_filename(PyString_AsString(self->prefix), subname, path);
466 if (len < 0)
467 return NULL;
468
469 if (mi == MI_PACKAGE) {
470 path[len] = SEP;
471 strcpy(path + len + 1, "__init__.py");
472 }
473 else
474 strcpy(path + len, ".py");
475
476 toc_entry = PyDict_GetItemString(self->files, path);
477 if (toc_entry != NULL)
478 return get_data(PyString_AsString(self->archive), toc_entry);
479
480 /* we have the module, but no source */
481 Py_INCREF(Py_None);
482 return Py_None;
483}
484
485PyDoc_STRVAR(doc_find_module,
486"find_module(fullname, path=None) -> self or None.\n\
487\n\
488Search for a module specified by 'fullname'. 'fullname' must be the\n\
489fully qualified (dotted) module name. It returns the zipimporter\n\
490instance itself if the module was found, or None if it wasn't.\n\
491The optional 'path' argument is ignored -- it's there for compatibility\n\
492with the importer protocol.");
493
494PyDoc_STRVAR(doc_load_module,
495"load_module(fullname) -> module.\n\
496\n\
497Load the module specified by 'fullname'. 'fullname' must be the\n\
498fully qualified (dotted) module name. It returns the imported\n\
499module, or raises ZipImportError if it wasn't found.");
500
501PyDoc_STRVAR(doc_get_data,
502"get_data(pathname) -> string with file data.\n\
503\n\
504Return the data associated with 'pathname'. Raise IOError if\n\
505the file wasn't found.");
506
507PyDoc_STRVAR(doc_is_package,
508"is_package(fullname) -> bool.\n\
509\n\
510Return True if the module specified by fullname is a package.\n\
511Raise ZipImportError is the module couldn't be found.");
512
513PyDoc_STRVAR(doc_get_code,
514"get_code(fullname) -> code object.\n\
515\n\
516Return the code object for the specified module. Raise ZipImportError\n\
517is the module couldn't be found.");
518
519PyDoc_STRVAR(doc_get_source,
520"get_source(fullname) -> source string.\n\
521\n\
522Return the source code for the specified module. Raise ZipImportError\n\
523is the module couldn't be found, return None if the archive does\n\
524contain the module, but has no source for it.");
525
526static PyMethodDef zipimporter_methods[] = {
527 {"find_module", zipimporter_find_module, METH_VARARGS,
528 doc_find_module},
529 {"load_module", zipimporter_load_module, METH_VARARGS,
530 doc_load_module},
531 {"get_data", zipimporter_get_data, METH_VARARGS,
532 doc_get_data},
533 {"get_code", zipimporter_get_code, METH_VARARGS,
534 doc_get_code},
535 {"get_source", zipimporter_get_source, METH_VARARGS,
536 doc_get_source},
537 {"is_package", zipimporter_is_package, METH_VARARGS,
538 doc_is_package},
539 {NULL, NULL} /* sentinel */
540};
541
542static PyMemberDef zipimporter_members[] = {
543 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
544 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
545 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
546 {NULL}
547};
548
549PyDoc_STRVAR(zipimporter_doc,
550"zipimporter(archivepath) -> zipimporter object\n\
551\n\
552Create a new zipimporter instance. 'archivepath' must be a path to\n\
553a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
554a valid Zip archive.");
555
556#define DEFERRED_ADDRESS(ADDR) 0
557
558static PyTypeObject ZipImporter_Type = {
559 PyObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type))
560 0,
561 "zipimport.zipimporter",
562 sizeof(ZipImporter),
563 0, /* tp_itemsize */
564 (destructor)zipimporter_dealloc, /* tp_dealloc */
565 0, /* tp_print */
566 0, /* tp_getattr */
567 0, /* tp_setattr */
568 0, /* tp_compare */
569 (reprfunc)zipimporter_repr, /* tp_repr */
570 0, /* tp_as_number */
571 0, /* tp_as_sequence */
572 0, /* tp_as_mapping */
573 0, /* tp_hash */
574 0, /* tp_call */
575 0, /* tp_str */
576 PyObject_GenericGetAttr, /* tp_getattro */
577 0, /* tp_setattro */
578 0, /* tp_as_buffer */
579 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
580 Py_TPFLAGS_HAVE_GC, /* tp_flags */
581 zipimporter_doc, /* tp_doc */
582 zipimporter_traverse, /* tp_traverse */
583 0, /* tp_clear */
584 0, /* tp_richcompare */
585 0, /* tp_weaklistoffset */
586 0, /* tp_iter */
587 0, /* tp_iternext */
588 zipimporter_methods, /* tp_methods */
589 zipimporter_members, /* tp_members */
590 0, /* tp_getset */
591 0, /* tp_base */
592 0, /* tp_dict */
593 0, /* tp_descr_get */
594 0, /* tp_descr_set */
595 0, /* tp_dictoffset */
596 (initproc)zipimporter_init, /* tp_init */
597 PyType_GenericAlloc, /* tp_alloc */
598 PyType_GenericNew, /* tp_new */
599 PyObject_GC_Del, /* tp_free */
600};
601
602
603/* implementation */
604
Just van Rossum52e14d62002-12-30 22:08:05 +0000605/* Given a buffer, return the long that is represented by the first
606 4 bytes, encoded as little endian. This partially reimplements
607 marshal.c:r_long() */
608static long
609get_long(unsigned char *buf) {
610 long x;
611 x = buf[0];
612 x |= (long)buf[1] << 8;
613 x |= (long)buf[2] << 16;
614 x |= (long)buf[3] << 24;
615#if SIZEOF_LONG > 4
616 /* Sign extension for 64-bit machines */
617 x |= -(x & 0x80000000L);
618#endif
619 return x;
620}
621
622/*
623 read_directory(archive) -> files dict (new reference)
624
625 Given a path to a Zip archive, build a dict, mapping file names
626 (local to the archive, using SEP as a separator) to toc entries.
627
628 A toc_entry is a tuple:
629
630 (compress, # compression kind; 0 for uncompressed
631 data_size, # size of compressed data on disk
632 file_size, # size of decompressed data
633 file_offset, # offset of file header from start of archive
634 time, # mod time of file (in dos format)
635 date, # mod data of file (in dos format)
636 crc, # crc checksum of the data
637 )
638
639 Directories can be recognized by the trailing SEP in the name,
640 data_size and file_offset are 0.
641*/
642static PyObject *
643read_directory(char *archive)
644{
645 PyObject *files = NULL;
646 FILE *fp;
647 long compress, crc, data_size, file_size, file_offset, date, time;
648 long header_offset, name_size, header_size, header_end;
649 long i, l, length, count;
650 char path[MAXPATHLEN + 5];
651 char name[MAXPATHLEN + 5];
652 char *p, endof_central_dir[22];
653
654 if (strlen(archive) > MAXPATHLEN) {
655 PyErr_SetString(PyExc_OverflowError,
656 "Zip path name is too long");
657 return NULL;
658 }
659 strcpy(path, archive);
660
661 fp = fopen(archive, "rb");
662 if (fp == NULL) {
663 PyErr_Format(ZipImportError, "can't open Zip file: "
664 "'%.200s'", archive);
665 return NULL;
666 }
667 fseek(fp, -22, 2); /* Seek from end of file */
668 header_end = ftell(fp);
669 if (fread(endof_central_dir, 1, 22, fp) != 22) {
670 fclose(fp);
671 PyErr_Format(ZipImportError, "can't read Zip file: "
672 "'%.200s'", archive);
673 return NULL;
674 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000675 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000676 /* Bad: End of Central Dir signature */
677 fclose(fp);
678 PyErr_Format(ZipImportError, "not a Zip file: "
679 "'%.200s'", archive);
680 return NULL;
681 }
682
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000683 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Just van Rossum52e14d62002-12-30 22:08:05 +0000684
685 files = PyDict_New();
686 if (files == NULL)
687 goto error;
688
689 length = (long)strlen(path);
690 path[length] = SEP;
691
692 /* Start of Central Directory */
693 count = 0;
694 for (;;) {
695 PyObject *t;
696 int err;
697
698 fseek(fp, header_offset, 0); /* Start of file header */
699 l = PyMarshal_ReadLongFromFile(fp);
700 if (l != 0x02014B50)
701 break; /* Bad: Central Dir File Header */
702 fseek(fp, header_offset + 10, 0);
703 compress = PyMarshal_ReadShortFromFile(fp);
704 time = PyMarshal_ReadShortFromFile(fp);
705 date = PyMarshal_ReadShortFromFile(fp);
706 crc = PyMarshal_ReadLongFromFile(fp);
707 data_size = PyMarshal_ReadLongFromFile(fp);
708 file_size = PyMarshal_ReadLongFromFile(fp);
709 name_size = PyMarshal_ReadShortFromFile(fp);
710 header_size = 46 + name_size +
711 PyMarshal_ReadShortFromFile(fp) +
712 PyMarshal_ReadShortFromFile(fp);
713 fseek(fp, header_offset + 42, 0);
714 file_offset = PyMarshal_ReadLongFromFile(fp);
715 if (name_size > MAXPATHLEN)
716 name_size = MAXPATHLEN;
717
718 p = name;
719 for (i = 0; i < name_size; i++) {
720 *p = (char)getc(fp);
721 if (*p == '/')
722 *p = SEP;
723 p++;
724 }
725 *p = 0; /* Add terminating null byte */
726 header_offset += header_size;
727
728 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
729
730 t = Py_BuildValue("siiiiiii", path, compress, data_size,
731 file_size, file_offset, time, date, crc);
732 if (t == NULL)
733 goto error;
734 err = PyDict_SetItemString(files, name, t);
735 Py_DECREF(t);
736 if (err != 0)
737 goto error;
738 count++;
739 }
740 fclose(fp);
741 if (Py_VerboseFlag)
742 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
743 count, archive);
744 return files;
745error:
746 fclose(fp);
747 Py_XDECREF(files);
748 return NULL;
749}
750
751/* Return the zlib.decompress function object, or NULL if zlib couldn't
752 be imported. The function is cached when found, so subsequent calls
753 don't import zlib again. Returns a *borrowed* reference.
754 XXX This makes zlib.decompress immortal. */
755static PyObject *
756get_decompress_func(void)
757{
758 static PyObject *decompress = NULL;
759
760 if (decompress == NULL) {
761 PyObject *zlib;
762 static int importing_zlib = 0;
763
764 if (importing_zlib != 0)
765 /* Someone has a zlib.py[co] in their Zip file;
766 let's avoid a stack overflow. */
767 return NULL;
768 importing_zlib = 1;
769 zlib = PyImport_ImportModule("zlib"); /* import zlib */
770 importing_zlib = 0;
771 if (zlib != NULL) {
772 decompress = PyObject_GetAttrString(zlib,
773 "decompress");
774 Py_DECREF(zlib);
775 }
776 else
777 PyErr_Clear();
778 if (Py_VerboseFlag)
779 PySys_WriteStderr("# zipimport: zlib %s\n",
780 zlib != NULL ? "available": "UNAVAILABLE");
781 }
782 return decompress;
783}
784
785/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
786 data as a new reference. */
787static PyObject *
788get_data(char *archive, PyObject *toc_entry)
789{
790 PyObject *raw_data, *data = NULL, *decompress;
791 char *buf;
792 FILE *fp;
793 int err, bytes_read = 0;
794 long l;
795 char *datapath;
796 long compress, data_size, file_size, file_offset;
797 long time, date, crc;
798
Neal Norwitz0c0aad92003-02-18 03:37:49 +0000799 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
Just van Rossum52e14d62002-12-30 22:08:05 +0000800 &data_size, &file_size, &file_offset, &time,
801 &date, &crc)) {
802 return NULL;
803 }
804
805 fp = fopen(archive, "rb");
806 if (!fp) {
807 PyErr_Format(PyExc_IOError,
808 "zipimport: can not open file %s", archive);
809 return NULL;
810 }
811
812 /* Check to make sure the local file header is correct */
813 fseek(fp, file_offset, 0);
814 l = PyMarshal_ReadLongFromFile(fp);
815 if (l != 0x04034B50) {
816 /* Bad: Local File Header */
817 PyErr_Format(ZipImportError,
818 "bad local file header in %s",
819 archive);
820 fclose(fp);
821 return NULL;
822 }
823 fseek(fp, file_offset + 26, 0);
824 l = 30 + PyMarshal_ReadShortFromFile(fp) +
825 PyMarshal_ReadShortFromFile(fp); /* local header size */
826 file_offset += l; /* Start of file data */
827
828 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
829 data_size : data_size + 1);
830 if (raw_data == NULL) {
831 fclose(fp);
832 return NULL;
833 }
834 buf = PyString_AsString(raw_data);
835
836 err = fseek(fp, file_offset, 0);
837 if (err == 0)
838 bytes_read = fread(buf, 1, data_size, fp);
839 fclose(fp);
840 if (err || bytes_read != data_size) {
841 PyErr_SetString(PyExc_IOError,
842 "zipimport: can't read data");
843 Py_DECREF(raw_data);
844 return NULL;
845 }
846
847 if (compress != 0) {
848 buf[data_size] = 'Z'; /* saw this in zipfile.py */
849 data_size++;
850 }
851 buf[data_size] = '\0';
852
853 if (compress == 0) /* data is not compressed */
854 return raw_data;
855
856 /* Decompress with zlib */
857 decompress = get_decompress_func();
858 if (decompress == NULL) {
859 PyErr_SetString(ZipImportError,
860 "can't decompress data; "
861 "zlib not available");
862 goto error;
863 }
864 data = PyObject_CallFunction(decompress, "Ol", raw_data, -15);
865error:
866 Py_DECREF(raw_data);
867 return data;
868}
869
870/* Lenient date/time comparison function. The precision of the mtime
871 in the archive is lower than the mtime stored in a .pyc: we
872 must allow a difference of at most one second. */
873static int
874eq_mtime(time_t t1, time_t t2)
875{
876 time_t d = t1 - t2;
877 if (d < 0)
878 d = -d;
879 /* dostime only stores even seconds, so be lenient */
880 return d <= 1;
881}
882
883/* Given the contents of a .py[co] file in a buffer, unmarshal the data
884 and return the code object. Return None if it the magic word doesn't
885 match (we do this instead of raising an exception as we fall back
886 to .py if available and we don't want to mask other errors).
887 Returns a new reference. */
888static PyObject *
889unmarshal_code(char *pathname, PyObject *data, time_t mtime)
890{
891 PyObject *code;
892 char *buf = PyString_AsString(data);
893 int size = PyString_Size(data);
894
895 if (size <= 9) {
896 PyErr_SetString(ZipImportError,
897 "bad pyc data");
898 return NULL;
899 }
900
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000901 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000902 if (Py_VerboseFlag)
903 PySys_WriteStderr("# %s has bad magic\n",
904 pathname);
905 Py_INCREF(Py_None);
906 return Py_None; /* signal caller to try alternative */
907 }
908
Just van Rossum9a3129c2003-01-03 11:18:56 +0000909 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
910 mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000911 if (Py_VerboseFlag)
912 PySys_WriteStderr("# %s has bad mtime\n",
913 pathname);
914 Py_INCREF(Py_None);
915 return Py_None; /* signal caller to try alternative */
916 }
917
918 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
919 if (code == NULL)
920 return NULL;
921 if (!PyCode_Check(code)) {
922 Py_DECREF(code);
923 PyErr_Format(PyExc_TypeError,
924 "compiled module %.200s is not a code object",
925 pathname);
926 return NULL;
927 }
928 return code;
929}
930
931/* Replace any occurances of "\r\n?" in the input string with "\n".
932 This converts DOS and Mac line endings to Unix line endings.
933 Also append a trailing "\n" to be compatible with
934 PyParser_SimpleParseFile(). Returns a new reference. */
935static PyObject *
936normalize_line_endings(PyObject *source)
937{
Just van Rossum9a3129c2003-01-03 11:18:56 +0000938 char *buf, *q, *p = PyString_AsString(source);
Just van Rossum52e14d62002-12-30 22:08:05 +0000939 PyObject *fixed_source;
940
Just van Rossum9a3129c2003-01-03 11:18:56 +0000941 /* one char extra for trailing \n and one for terminating \0 */
942 buf = PyMem_Malloc(PyString_Size(source) + 2);
943 if (buf == NULL) {
944 PyErr_SetString(PyExc_MemoryError,
945 "zipimport: no memory to allocate "
946 "source buffer");
Just van Rossum52e14d62002-12-30 22:08:05 +0000947 return NULL;
Just van Rossum9a3129c2003-01-03 11:18:56 +0000948 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000949 /* replace "\r\n?" by "\n" */
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000950 for (q = buf; *p != '\0'; p++) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000951 if (*p == '\r') {
952 *q++ = '\n';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000953 if (*(p + 1) == '\n')
Just van Rossum52e14d62002-12-30 22:08:05 +0000954 p++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000955 }
956 else
957 *q++ = *p;
Just van Rossum52e14d62002-12-30 22:08:05 +0000958 }
959 *q++ = '\n'; /* add trailing \n */
960 *q = '\0';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000961 fixed_source = PyString_FromString(buf);
962 PyMem_Free(buf);
Just van Rossum52e14d62002-12-30 22:08:05 +0000963 return fixed_source;
964}
965
966/* Given a string buffer containing Python source code, compile it
967 return and return a code object as a new reference. */
968static PyObject *
969compile_source(char *pathname, PyObject *source)
970{
971 PyObject *code, *fixed_source;
972
973 fixed_source = normalize_line_endings(source);
974 if (fixed_source == NULL)
975 return NULL;
976
977 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
978 Py_file_input);
979 Py_DECREF(fixed_source);
980 return code;
981}
982
983/* Convert the date/time values found in the Zip archive to a value
984 that's compatible with the time stamp stored in .pyc files. */
985time_t parse_dostime(int dostime, int dosdate)
986{
987 struct tm stm;
988
989 stm.tm_sec = (dostime & 0x1f) * 2;
990 stm.tm_min = (dostime >> 5) & 0x3f;
991 stm.tm_hour = (dostime >> 11) & 0x1f;
992 stm.tm_mday = dosdate & 0x1f;
993 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
994 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
995 stm.tm_isdst = 0; /* wday/yday is ignored */
996
997 return mktime(&stm);
998}
999
1000/* Given a path to a .pyc or .pyo file in the archive, return the
1001 modifictaion time of the matching .py file, or 0 if no source
1002 is available. */
1003static time_t
1004get_mtime_of_source(ZipImporter *self, char *path)
1005{
1006 PyObject *toc_entry;
1007 time_t mtime = 0;
1008 int lastchar = strlen(path) - 1;
1009 char savechar = path[lastchar];
1010 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1011 toc_entry = PyDict_GetItemString(self->files, path);
1012 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1013 PyTuple_Size(toc_entry) == 8) {
Just van Rossumf8b6de12002-12-31 09:51:59 +00001014 /* fetch the time stamp of the .py file for comparison
1015 with an embedded pyc time stamp */
1016 int time, date;
Just van Rossum52e14d62002-12-30 22:08:05 +00001017 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1018 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1019 mtime = parse_dostime(time, date);
1020 }
1021 path[lastchar] = savechar;
1022 return mtime;
1023}
1024
1025/* Return the code object for the module named by 'fullname' from the
1026 Zip archive as a new reference. */
1027static PyObject *
1028get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1029 time_t mtime, PyObject *toc_entry)
1030{
1031 PyObject *data, *code;
1032 char *modpath;
1033 char *archive = PyString_AsString(self->archive);
1034
1035 if (archive == NULL)
1036 return NULL;
1037
1038 data = get_data(archive, toc_entry);
1039 if (data == NULL)
1040 return NULL;
1041
1042 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1043
1044 if (isbytecode) {
1045 code = unmarshal_code(modpath, data, mtime);
1046 }
1047 else {
1048 code = compile_source(modpath, data);
1049 }
1050 Py_DECREF(data);
1051 return code;
1052}
1053
1054/* Get the code object assoiciated with the module specified by
1055 'fullname'. */
1056static PyObject *
1057get_module_code(ZipImporter *self, char *fullname,
1058 int *p_ispackage, char **p_modpath)
1059{
1060 PyObject *toc_entry;
1061 char *subname, path[MAXPATHLEN + 1];
1062 int len;
1063 struct st_zip_searchorder *zso;
1064
1065 subname = get_subname(fullname);
1066
1067 len = make_filename(PyString_AsString(self->prefix), subname, path);
1068 if (len < 0)
1069 return NULL;
1070
1071 for (zso = zip_searchorder; *zso->suffix; zso++) {
1072 PyObject *code = NULL;
1073
1074 strcpy(path + len, zso->suffix);
1075 if (Py_VerboseFlag > 1)
1076 PySys_WriteStderr("# trying %s%c%s\n",
1077 PyString_AsString(self->archive),
1078 SEP, path);
1079 toc_entry = PyDict_GetItemString(self->files, path);
1080 if (toc_entry != NULL) {
1081 time_t mtime = 0;
1082 int ispackage = zso->type & IS_PACKAGE;
1083 int isbytecode = zso->type & IS_BYTECODE;
1084
1085 if (isbytecode)
1086 mtime = get_mtime_of_source(self, path);
1087 if (p_ispackage != NULL)
1088 *p_ispackage = ispackage;
1089 code = get_code_from_data(self, ispackage,
1090 isbytecode, mtime,
1091 toc_entry);
1092 if (code == Py_None) {
1093 /* bad magic number or non-matching mtime
1094 in byte code, try next */
1095 Py_DECREF(code);
1096 continue;
1097 }
1098 if (code != NULL && p_modpath != NULL)
1099 *p_modpath = PyString_AsString(
1100 PyTuple_GetItem(toc_entry, 0));
1101 return code;
1102 }
1103 }
1104 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1105 return NULL;
1106}
1107
1108
1109/* Module init */
1110
1111PyDoc_STRVAR(zipimport_doc,
1112"zipimport provides support for importing Python modules from Zip archives.\n\
1113\n\
1114This module exports three objects:\n\
1115- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1116- ZipImporterError: exception raised by zipimporter objects. It's a\n\
1117 subclass of ImportError, so it can be caught as ImportError, too.\n\
1118- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1119 info dicts, as used in zipimporter._files.\n\
1120\n\
1121It is usually not needed to use the zipimport module explicitly; it is\n\
1122used by the builtin import mechanism for sys.path items that are paths\n\
1123to Zip archives.");
1124
1125PyMODINIT_FUNC
1126initzipimport(void)
1127{
1128 PyObject *mod;
1129
1130 if (PyType_Ready(&ZipImporter_Type) < 0)
1131 return;
1132
1133 /* Correct directory separator */
1134 zip_searchorder[0].suffix[0] = SEP;
1135 zip_searchorder[1].suffix[0] = SEP;
1136 zip_searchorder[2].suffix[0] = SEP;
1137 if (Py_OptimizeFlag) {
1138 /* Reverse *.pyc and *.pyo */
1139 struct st_zip_searchorder tmp;
1140 tmp = zip_searchorder[0];
1141 zip_searchorder[0] = zip_searchorder[1];
1142 zip_searchorder[1] = tmp;
1143 tmp = zip_searchorder[3];
1144 zip_searchorder[3] = zip_searchorder[4];
1145 zip_searchorder[4] = tmp;
1146 }
1147
1148 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1149 NULL, PYTHON_API_VERSION);
1150
1151 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1152 PyExc_ImportError, NULL);
1153 if (ZipImportError == NULL)
1154 return;
1155
1156 Py_INCREF(ZipImportError);
1157 if (PyModule_AddObject(mod, "ZipImportError",
1158 ZipImportError) < 0)
1159 return;
1160
1161 Py_INCREF(&ZipImporter_Type);
1162 if (PyModule_AddObject(mod, "zipimporter",
1163 (PyObject *)&ZipImporter_Type) < 0)
1164 return;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001165
Just van Rossum52e14d62002-12-30 22:08:05 +00001166 zip_directory_cache = PyDict_New();
1167 if (zip_directory_cache == NULL)
1168 return;
1169 Py_INCREF(zip_directory_cache);
1170 if (PyModule_AddObject(mod, "_zip_directory_cache",
1171 zip_directory_cache) < 0)
1172 return;
1173}