blob: f0eaef5a64de1cc95a13e697759498c3967d422c [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include "compile.h"
6#include <time.h>
7
8
9#define IS_SOURCE 0x0
10#define IS_BYTECODE 0x1
11#define IS_PACKAGE 0x2
12
13struct st_zip_searchorder {
14 char suffix[14];
15 int type;
16};
17
18/* zip_searchorder defines how we search for a module in the Zip
19 archive: we first search for a package __init__, then for
20 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
21 are swapped by initzipimport() if we run in optimized mode. Also,
22 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000023static struct st_zip_searchorder zip_searchorder[] = {
Just van Rossum52e14d62002-12-30 22:08:05 +000024 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
26 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
27 {".pyc", IS_BYTECODE},
28 {".pyo", IS_BYTECODE},
29 {".py", IS_SOURCE},
30 {"", 0}
31};
32
33/* zipimporter object definition and support */
34
35typedef struct _zipimporter ZipImporter;
36
37struct _zipimporter {
38 PyObject_HEAD
39 PyObject *archive; /* pathname of the Zip archive */
40 PyObject *prefix; /* file prefix: "a/sub/directory/" */
41 PyObject *files; /* dict with file info {path: toc_entry} */
42};
43
44static PyTypeObject ZipImporter_Type;
45static PyObject *ZipImportError;
46static PyObject *zip_directory_cache = NULL;
47
48/* forward decls */
49static PyObject *read_directory(char *archive);
50static PyObject *get_data(char *archive, PyObject *toc_entry);
51static PyObject *get_module_code(ZipImporter *self, char *fullname,
52 int *p_ispackage, char **p_modpath);
53
54
55#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
56
57
58/* zipimporter.__init__
59 Split the "subdirectory" from the Zip archive path, lookup a matching
60 entry in sys.path_importer_cache, fetch the file directory from there
61 if found, or else read it from the archive. */
62static int
63zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
64{
65 char *path, *p, *prefix, buf[MAXPATHLEN+2];
66 int len;
67
68 if (!PyArg_ParseTuple(args, "s:zipimporter",
69 &path))
70 return -1;
71
72 len = strlen(path);
73 if (len == 0) {
74 PyErr_SetString(ZipImportError, "archive path is empty");
75 return -1;
76 }
77 if (len >= MAXPATHLEN) {
78 PyErr_SetString(ZipImportError,
79 "archive path too long");
80 return -1;
81 }
82 strcpy(buf, path);
83
84#ifdef ALTSEP
85 for (p = buf; *p; p++) {
86 if (*p == ALTSEP)
87 *p = SEP;
88 }
89#endif
90
91 path = NULL;
92 prefix = NULL;
93 for (;;) {
Martin v. Löwisa94568a2003-05-10 07:36:56 +000094#ifndef RISCOS
Just van Rossum52e14d62002-12-30 22:08:05 +000095 struct stat statbuf;
96 int rv;
97
98 rv = stat(buf, &statbuf);
99 if (rv == 0) {
100 /* it exists */
101 if (S_ISREG(statbuf.st_mode))
102 /* it's a file */
103 path = buf;
104 break;
105 }
Martin v. Löwisa94568a2003-05-10 07:36:56 +0000106#else
107 if (object_exists(buf)) {
108 /* it exists */
109 if (isfile(buf))
110 /* it's a file */
111 path = buf;
112 break;
113 }
114#endif
Just van Rossum52e14d62002-12-30 22:08:05 +0000115 /* back up one path element */
Just van Rossumd35c6db2003-01-02 12:55:48 +0000116 p = strrchr(buf, SEP);
Just van Rossum52e14d62002-12-30 22:08:05 +0000117 if (prefix != NULL)
118 *prefix = SEP;
119 if (p == NULL)
120 break;
121 *p = '\0';
122 prefix = p;
123 }
124 if (path != NULL) {
125 PyObject *files;
126 files = PyDict_GetItemString(zip_directory_cache, path);
127 if (files == NULL) {
128 files = read_directory(buf);
129 if (files == NULL)
130 return -1;
131 if (PyDict_SetItemString(zip_directory_cache, path,
132 files) != 0)
133 return -1;
134 }
135 else
136 Py_INCREF(files);
137 self->files = files;
138 }
139 else {
140 PyErr_SetString(ZipImportError, "not a Zip file");
141 return -1;
142 }
143
144 if (prefix == NULL)
145 prefix = "";
146 else {
147 prefix++;
148 len = strlen(prefix);
149 if (prefix[len-1] != SEP) {
150 /* add trailing SEP */
151 prefix[len] = SEP;
152 prefix[len + 1] = '\0';
153 }
154 }
155
156 self->archive = PyString_FromString(buf);
157 if (self->archive == NULL)
158 return -1;
159
160 self->prefix = PyString_FromString(prefix);
161 if (self->prefix == NULL)
162 return -1;
163
164 return 0;
165}
166
167/* GC support. */
168static int
169zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
170{
171 ZipImporter *self = (ZipImporter *)obj;
172 int err;
173
174 if (self->files != NULL) {
175 err = visit(self->files, arg);
176 if (err)
177 return err;
178 }
179 return 0;
180}
181
182static void
183zipimporter_dealloc(ZipImporter *self)
184{
185 PyObject_GC_UnTrack(self);
186 Py_XDECREF(self->archive);
Just van Rossumf8b6de12002-12-31 09:51:59 +0000187 Py_XDECREF(self->prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000188 Py_XDECREF(self->files);
189 self->ob_type->tp_free((PyObject *)self);
190}
191
192static PyObject *
193zipimporter_repr(ZipImporter *self)
194{
195 char buf[500];
196 char *archive = "???";
197 char *prefix = "";
198
199 if (self->archive != NULL && PyString_Check(self->archive))
200 archive = PyString_AsString(self->archive);
201 if (self->prefix != NULL && PyString_Check(self->prefix))
202 prefix = PyString_AsString(self->prefix);
203 if (prefix != NULL && *prefix)
204 PyOS_snprintf(buf, sizeof(buf),
205 "<zipimporter object \"%.300s%c%.150s\">",
206 archive, SEP, prefix);
207 else
208 PyOS_snprintf(buf, sizeof(buf),
209 "<zipimporter object \"%.300s\">",
210 archive);
211 return PyString_FromString(buf);
212}
213
214/* return fullname.split(".")[-1] */
215static char *
216get_subname(char *fullname)
217{
218 char *subname = strrchr(fullname, '.');
219 if (subname == NULL)
220 subname = fullname;
221 else
222 subname++;
223 return subname;
224}
225
226/* Given a (sub)modulename, write the potential file path in the
227 archive (without extension) to the path buffer. Return the
228 length of the resulting string. */
229static int
230make_filename(char *prefix, char *name, char *path)
231{
232 int len;
233 char *p;
234
235 len = strlen(prefix);
236
237 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
238 if (len + strlen(name) + 13 >= MAXPATHLEN) {
239 PyErr_SetString(ZipImportError, "path too long");
240 return -1;
241 }
242
243 strcpy(path, prefix);
244 strcpy(path + len, name);
245 for (p = path + len; *p; p++) {
246 if (*p == '.')
247 *p = SEP;
248 }
249 len += strlen(name);
250 return len;
251}
252
253enum module_info {
254 MI_ERROR,
255 MI_NOT_FOUND,
256 MI_MODULE,
257 MI_PACKAGE
258};
259
260/* Return some information about a module. */
261static enum module_info
262get_module_info(ZipImporter *self, char *fullname)
263{
264 char *subname, path[MAXPATHLEN + 1];
265 int len;
266 struct st_zip_searchorder *zso;
267
268 subname = get_subname(fullname);
269
270 len = make_filename(PyString_AsString(self->prefix), subname, path);
271 if (len < 0)
272 return MI_ERROR;
273
274 for (zso = zip_searchorder; *zso->suffix; zso++) {
275 strcpy(path + len, zso->suffix);
276 if (PyDict_GetItemString(self->files, path) != NULL) {
277 if (zso->type & IS_PACKAGE)
278 return MI_PACKAGE;
279 else
280 return MI_MODULE;
281 }
282 }
283 return MI_NOT_FOUND;
284}
285
286/* Check whether we can satisfy the import of the module named by
287 'fullname'. Return self if we can, None if we can't. */
288static PyObject *
289zipimporter_find_module(PyObject *obj, PyObject *args)
290{
291 ZipImporter *self = (ZipImporter *)obj;
292 PyObject *path = NULL;
293 char *fullname;
294 enum module_info mi;
295
296 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
297 &fullname, &path))
298 return NULL;
299
300 mi = get_module_info(self, fullname);
301 if (mi == MI_ERROR)
302 return NULL;
303 if (mi == MI_NOT_FOUND) {
304 Py_INCREF(Py_None);
305 return Py_None;
306 }
307 Py_INCREF(self);
308 return (PyObject *)self;
309}
310
311/* Load and return the module named by 'fullname'. */
312static PyObject *
313zipimporter_load_module(PyObject *obj, PyObject *args)
314{
315 ZipImporter *self = (ZipImporter *)obj;
316 PyObject *code, *mod, *dict;
317 char *fullname, *modpath;
318 int ispackage;
319
320 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
321 &fullname))
322 return NULL;
323
324 code = get_module_code(self, fullname, &ispackage, &modpath);
325 if (code == NULL)
326 return NULL;
327
328 mod = PyImport_AddModule(fullname);
329 if (mod == NULL) {
330 Py_DECREF(code);
331 return NULL;
332 }
333 dict = PyModule_GetDict(mod);
334
335 /* mod.__loader__ = self */
336 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
337 goto error;
338
339 if (ispackage) {
340 /* add __path__ to the module *before* the code gets
341 executed */
342 PyObject *pkgpath, *fullpath;
343 char *prefix = PyString_AsString(self->prefix);
344 char *subname = get_subname(fullname);
345 int err;
346
347 fullpath = PyString_FromFormat("%s%c%s%s",
348 PyString_AsString(self->archive),
349 SEP,
350 *prefix ? prefix : "",
351 subname);
352 if (fullpath == NULL)
353 goto error;
354
355 pkgpath = Py_BuildValue("[O]", fullpath);
356 Py_DECREF(fullpath);
357 if (pkgpath == NULL)
358 goto error;
359 err = PyDict_SetItemString(dict, "__path__", pkgpath);
360 Py_DECREF(pkgpath);
361 if (err != 0)
362 goto error;
363 }
364 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
365 Py_DECREF(code);
366 if (Py_VerboseFlag)
367 PySys_WriteStderr("import %s # loaded from Zip %s\n",
368 fullname, modpath);
369 return mod;
370error:
371 Py_DECREF(code);
372 Py_DECREF(mod);
373 return NULL;
374}
375
376/* Return a bool signifying whether the module is a package or not. */
377static PyObject *
378zipimporter_is_package(PyObject *obj, PyObject *args)
379{
380 ZipImporter *self = (ZipImporter *)obj;
381 char *fullname;
382 enum module_info mi;
383
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000384 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
Just van Rossum52e14d62002-12-30 22:08:05 +0000385 &fullname))
386 return NULL;
387
388 mi = get_module_info(self, fullname);
389 if (mi == MI_ERROR)
390 return NULL;
391 if (mi == MI_NOT_FOUND) {
392 PyErr_Format(ZipImportError, "can't find module '%.200s'",
393 fullname);
394 return NULL;
395 }
396 return PyBool_FromLong(mi == MI_PACKAGE);
397}
398
399static PyObject *
400zipimporter_get_data(PyObject *obj, PyObject *args)
401{
402 ZipImporter *self = (ZipImporter *)obj;
403 char *path;
404#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000405 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000406#endif
407 PyObject *toc_entry;
408 int len;
409
410 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
411 return NULL;
412
413#ifdef ALTSEP
414 if (strlen(path) >= MAXPATHLEN) {
415 PyErr_SetString(ZipImportError, "path too long");
416 return NULL;
417 }
418 strcpy(buf, path);
419 for (p = buf; *p; p++) {
420 if (*p == ALTSEP)
421 *p = SEP;
422 }
423 path = buf;
424#endif
425 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000426 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000427 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
428 path[len] == SEP) {
429 path = path + len + 1;
430 }
431
432 toc_entry = PyDict_GetItemString(self->files, path);
433 if (toc_entry == NULL) {
434 PyErr_Format(PyExc_IOError, "file not found [%.200s]",
435 path);
436 return NULL;
437 }
438 return get_data(PyString_AsString(self->archive), toc_entry);
439}
440
441static PyObject *
442zipimporter_get_code(PyObject *obj, PyObject *args)
443{
444 ZipImporter *self = (ZipImporter *)obj;
445 char *fullname;
446
447 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
448 return NULL;
449
450 return get_module_code(self, fullname, NULL, NULL);
451}
452
453static PyObject *
454zipimporter_get_source(PyObject *obj, PyObject *args)
455{
456 ZipImporter *self = (ZipImporter *)obj;
457 PyObject *toc_entry;
458 char *fullname, *subname, path[MAXPATHLEN+1];
459 int len;
460 enum module_info mi;
461
462 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
463 return NULL;
464
465 mi = get_module_info(self, fullname);
466 if (mi == MI_ERROR)
467 return NULL;
468 if (mi == MI_NOT_FOUND) {
469 PyErr_Format(ZipImportError, "can't find module '%.200s'",
470 fullname);
471 return NULL;
472 }
473 subname = get_subname(fullname);
474
475 len = make_filename(PyString_AsString(self->prefix), subname, path);
476 if (len < 0)
477 return NULL;
478
479 if (mi == MI_PACKAGE) {
480 path[len] = SEP;
481 strcpy(path + len + 1, "__init__.py");
482 }
483 else
484 strcpy(path + len, ".py");
485
486 toc_entry = PyDict_GetItemString(self->files, path);
487 if (toc_entry != NULL)
488 return get_data(PyString_AsString(self->archive), toc_entry);
489
490 /* we have the module, but no source */
491 Py_INCREF(Py_None);
492 return Py_None;
493}
494
495PyDoc_STRVAR(doc_find_module,
496"find_module(fullname, path=None) -> self or None.\n\
497\n\
498Search for a module specified by 'fullname'. 'fullname' must be the\n\
499fully qualified (dotted) module name. It returns the zipimporter\n\
500instance itself if the module was found, or None if it wasn't.\n\
501The optional 'path' argument is ignored -- it's there for compatibility\n\
502with the importer protocol.");
503
504PyDoc_STRVAR(doc_load_module,
505"load_module(fullname) -> module.\n\
506\n\
507Load the module specified by 'fullname'. 'fullname' must be the\n\
508fully qualified (dotted) module name. It returns the imported\n\
509module, or raises ZipImportError if it wasn't found.");
510
511PyDoc_STRVAR(doc_get_data,
512"get_data(pathname) -> string with file data.\n\
513\n\
514Return the data associated with 'pathname'. Raise IOError if\n\
515the file wasn't found.");
516
517PyDoc_STRVAR(doc_is_package,
518"is_package(fullname) -> bool.\n\
519\n\
520Return True if the module specified by fullname is a package.\n\
521Raise ZipImportError is the module couldn't be found.");
522
523PyDoc_STRVAR(doc_get_code,
524"get_code(fullname) -> code object.\n\
525\n\
526Return the code object for the specified module. Raise ZipImportError\n\
527is the module couldn't be found.");
528
529PyDoc_STRVAR(doc_get_source,
530"get_source(fullname) -> source string.\n\
531\n\
532Return the source code for the specified module. Raise ZipImportError\n\
533is the module couldn't be found, return None if the archive does\n\
534contain the module, but has no source for it.");
535
536static PyMethodDef zipimporter_methods[] = {
537 {"find_module", zipimporter_find_module, METH_VARARGS,
538 doc_find_module},
539 {"load_module", zipimporter_load_module, METH_VARARGS,
540 doc_load_module},
541 {"get_data", zipimporter_get_data, METH_VARARGS,
542 doc_get_data},
543 {"get_code", zipimporter_get_code, METH_VARARGS,
544 doc_get_code},
545 {"get_source", zipimporter_get_source, METH_VARARGS,
546 doc_get_source},
547 {"is_package", zipimporter_is_package, METH_VARARGS,
548 doc_is_package},
549 {NULL, NULL} /* sentinel */
550};
551
552static PyMemberDef zipimporter_members[] = {
553 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
554 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
555 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
556 {NULL}
557};
558
559PyDoc_STRVAR(zipimporter_doc,
560"zipimporter(archivepath) -> zipimporter object\n\
561\n\
562Create a new zipimporter instance. 'archivepath' must be a path to\n\
563a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
564a valid Zip archive.");
565
566#define DEFERRED_ADDRESS(ADDR) 0
567
568static PyTypeObject ZipImporter_Type = {
569 PyObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type))
570 0,
571 "zipimport.zipimporter",
572 sizeof(ZipImporter),
573 0, /* tp_itemsize */
574 (destructor)zipimporter_dealloc, /* tp_dealloc */
575 0, /* tp_print */
576 0, /* tp_getattr */
577 0, /* tp_setattr */
578 0, /* tp_compare */
579 (reprfunc)zipimporter_repr, /* tp_repr */
580 0, /* tp_as_number */
581 0, /* tp_as_sequence */
582 0, /* tp_as_mapping */
583 0, /* tp_hash */
584 0, /* tp_call */
585 0, /* tp_str */
586 PyObject_GenericGetAttr, /* tp_getattro */
587 0, /* tp_setattro */
588 0, /* tp_as_buffer */
589 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
590 Py_TPFLAGS_HAVE_GC, /* tp_flags */
591 zipimporter_doc, /* tp_doc */
592 zipimporter_traverse, /* tp_traverse */
593 0, /* tp_clear */
594 0, /* tp_richcompare */
595 0, /* tp_weaklistoffset */
596 0, /* tp_iter */
597 0, /* tp_iternext */
598 zipimporter_methods, /* tp_methods */
599 zipimporter_members, /* tp_members */
600 0, /* tp_getset */
601 0, /* tp_base */
602 0, /* tp_dict */
603 0, /* tp_descr_get */
604 0, /* tp_descr_set */
605 0, /* tp_dictoffset */
606 (initproc)zipimporter_init, /* tp_init */
607 PyType_GenericAlloc, /* tp_alloc */
608 PyType_GenericNew, /* tp_new */
609 PyObject_GC_Del, /* tp_free */
610};
611
612
613/* implementation */
614
Just van Rossum52e14d62002-12-30 22:08:05 +0000615/* Given a buffer, return the long that is represented by the first
616 4 bytes, encoded as little endian. This partially reimplements
617 marshal.c:r_long() */
618static long
619get_long(unsigned char *buf) {
620 long x;
621 x = buf[0];
622 x |= (long)buf[1] << 8;
623 x |= (long)buf[2] << 16;
624 x |= (long)buf[3] << 24;
625#if SIZEOF_LONG > 4
626 /* Sign extension for 64-bit machines */
627 x |= -(x & 0x80000000L);
628#endif
629 return x;
630}
631
632/*
633 read_directory(archive) -> files dict (new reference)
634
635 Given a path to a Zip archive, build a dict, mapping file names
636 (local to the archive, using SEP as a separator) to toc entries.
637
638 A toc_entry is a tuple:
639
640 (compress, # compression kind; 0 for uncompressed
641 data_size, # size of compressed data on disk
642 file_size, # size of decompressed data
643 file_offset, # offset of file header from start of archive
644 time, # mod time of file (in dos format)
645 date, # mod data of file (in dos format)
646 crc, # crc checksum of the data
647 )
648
649 Directories can be recognized by the trailing SEP in the name,
650 data_size and file_offset are 0.
651*/
652static PyObject *
653read_directory(char *archive)
654{
655 PyObject *files = NULL;
656 FILE *fp;
657 long compress, crc, data_size, file_size, file_offset, date, time;
658 long header_offset, name_size, header_size, header_end;
659 long i, l, length, count;
660 char path[MAXPATHLEN + 5];
661 char name[MAXPATHLEN + 5];
662 char *p, endof_central_dir[22];
663
664 if (strlen(archive) > MAXPATHLEN) {
665 PyErr_SetString(PyExc_OverflowError,
666 "Zip path name is too long");
667 return NULL;
668 }
669 strcpy(path, archive);
670
671 fp = fopen(archive, "rb");
672 if (fp == NULL) {
673 PyErr_Format(ZipImportError, "can't open Zip file: "
674 "'%.200s'", archive);
675 return NULL;
676 }
Just van Rossumf4ecc752003-02-28 08:54:01 +0000677 fseek(fp, -22, SEEK_END);
Just van Rossum52e14d62002-12-30 22:08:05 +0000678 header_end = ftell(fp);
679 if (fread(endof_central_dir, 1, 22, fp) != 22) {
680 fclose(fp);
681 PyErr_Format(ZipImportError, "can't read Zip file: "
682 "'%.200s'", archive);
683 return NULL;
684 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000685 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000686 /* Bad: End of Central Dir signature */
687 fclose(fp);
688 PyErr_Format(ZipImportError, "not a Zip file: "
689 "'%.200s'", archive);
690 return NULL;
691 }
692
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000693 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Just van Rossum52e14d62002-12-30 22:08:05 +0000694
695 files = PyDict_New();
696 if (files == NULL)
697 goto error;
698
699 length = (long)strlen(path);
700 path[length] = SEP;
701
702 /* Start of Central Directory */
703 count = 0;
704 for (;;) {
705 PyObject *t;
706 int err;
707
708 fseek(fp, header_offset, 0); /* Start of file header */
709 l = PyMarshal_ReadLongFromFile(fp);
710 if (l != 0x02014B50)
711 break; /* Bad: Central Dir File Header */
712 fseek(fp, header_offset + 10, 0);
713 compress = PyMarshal_ReadShortFromFile(fp);
714 time = PyMarshal_ReadShortFromFile(fp);
715 date = PyMarshal_ReadShortFromFile(fp);
716 crc = PyMarshal_ReadLongFromFile(fp);
717 data_size = PyMarshal_ReadLongFromFile(fp);
718 file_size = PyMarshal_ReadLongFromFile(fp);
719 name_size = PyMarshal_ReadShortFromFile(fp);
720 header_size = 46 + name_size +
721 PyMarshal_ReadShortFromFile(fp) +
722 PyMarshal_ReadShortFromFile(fp);
723 fseek(fp, header_offset + 42, 0);
724 file_offset = PyMarshal_ReadLongFromFile(fp);
725 if (name_size > MAXPATHLEN)
726 name_size = MAXPATHLEN;
727
728 p = name;
729 for (i = 0; i < name_size; i++) {
730 *p = (char)getc(fp);
731 if (*p == '/')
732 *p = SEP;
733 p++;
734 }
735 *p = 0; /* Add terminating null byte */
736 header_offset += header_size;
737
738 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
739
740 t = Py_BuildValue("siiiiiii", path, compress, data_size,
741 file_size, file_offset, time, date, crc);
742 if (t == NULL)
743 goto error;
744 err = PyDict_SetItemString(files, name, t);
745 Py_DECREF(t);
746 if (err != 0)
747 goto error;
748 count++;
749 }
750 fclose(fp);
751 if (Py_VerboseFlag)
752 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
753 count, archive);
754 return files;
755error:
756 fclose(fp);
757 Py_XDECREF(files);
758 return NULL;
759}
760
761/* Return the zlib.decompress function object, or NULL if zlib couldn't
762 be imported. The function is cached when found, so subsequent calls
763 don't import zlib again. Returns a *borrowed* reference.
764 XXX This makes zlib.decompress immortal. */
765static PyObject *
766get_decompress_func(void)
767{
768 static PyObject *decompress = NULL;
769
770 if (decompress == NULL) {
771 PyObject *zlib;
772 static int importing_zlib = 0;
773
774 if (importing_zlib != 0)
775 /* Someone has a zlib.py[co] in their Zip file;
776 let's avoid a stack overflow. */
777 return NULL;
778 importing_zlib = 1;
779 zlib = PyImport_ImportModule("zlib"); /* import zlib */
780 importing_zlib = 0;
781 if (zlib != NULL) {
782 decompress = PyObject_GetAttrString(zlib,
783 "decompress");
784 Py_DECREF(zlib);
785 }
786 else
787 PyErr_Clear();
788 if (Py_VerboseFlag)
789 PySys_WriteStderr("# zipimport: zlib %s\n",
790 zlib != NULL ? "available": "UNAVAILABLE");
791 }
792 return decompress;
793}
794
795/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
796 data as a new reference. */
797static PyObject *
798get_data(char *archive, PyObject *toc_entry)
799{
800 PyObject *raw_data, *data = NULL, *decompress;
801 char *buf;
802 FILE *fp;
803 int err, bytes_read = 0;
804 long l;
805 char *datapath;
806 long compress, data_size, file_size, file_offset;
807 long time, date, crc;
808
Neal Norwitz0c0aad92003-02-18 03:37:49 +0000809 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
Just van Rossum52e14d62002-12-30 22:08:05 +0000810 &data_size, &file_size, &file_offset, &time,
811 &date, &crc)) {
812 return NULL;
813 }
814
815 fp = fopen(archive, "rb");
816 if (!fp) {
817 PyErr_Format(PyExc_IOError,
818 "zipimport: can not open file %s", archive);
819 return NULL;
820 }
821
822 /* Check to make sure the local file header is correct */
823 fseek(fp, file_offset, 0);
824 l = PyMarshal_ReadLongFromFile(fp);
825 if (l != 0x04034B50) {
826 /* Bad: Local File Header */
827 PyErr_Format(ZipImportError,
828 "bad local file header in %s",
829 archive);
830 fclose(fp);
831 return NULL;
832 }
833 fseek(fp, file_offset + 26, 0);
834 l = 30 + PyMarshal_ReadShortFromFile(fp) +
835 PyMarshal_ReadShortFromFile(fp); /* local header size */
836 file_offset += l; /* Start of file data */
837
838 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
839 data_size : data_size + 1);
840 if (raw_data == NULL) {
841 fclose(fp);
842 return NULL;
843 }
844 buf = PyString_AsString(raw_data);
845
846 err = fseek(fp, file_offset, 0);
847 if (err == 0)
848 bytes_read = fread(buf, 1, data_size, fp);
849 fclose(fp);
850 if (err || bytes_read != data_size) {
851 PyErr_SetString(PyExc_IOError,
852 "zipimport: can't read data");
853 Py_DECREF(raw_data);
854 return NULL;
855 }
856
857 if (compress != 0) {
858 buf[data_size] = 'Z'; /* saw this in zipfile.py */
859 data_size++;
860 }
861 buf[data_size] = '\0';
862
863 if (compress == 0) /* data is not compressed */
864 return raw_data;
865
866 /* Decompress with zlib */
867 decompress = get_decompress_func();
868 if (decompress == NULL) {
869 PyErr_SetString(ZipImportError,
870 "can't decompress data; "
871 "zlib not available");
872 goto error;
873 }
874 data = PyObject_CallFunction(decompress, "Ol", raw_data, -15);
875error:
876 Py_DECREF(raw_data);
877 return data;
878}
879
880/* Lenient date/time comparison function. The precision of the mtime
881 in the archive is lower than the mtime stored in a .pyc: we
882 must allow a difference of at most one second. */
883static int
884eq_mtime(time_t t1, time_t t2)
885{
886 time_t d = t1 - t2;
887 if (d < 0)
888 d = -d;
889 /* dostime only stores even seconds, so be lenient */
890 return d <= 1;
891}
892
893/* Given the contents of a .py[co] file in a buffer, unmarshal the data
894 and return the code object. Return None if it the magic word doesn't
895 match (we do this instead of raising an exception as we fall back
896 to .py if available and we don't want to mask other errors).
897 Returns a new reference. */
898static PyObject *
899unmarshal_code(char *pathname, PyObject *data, time_t mtime)
900{
901 PyObject *code;
902 char *buf = PyString_AsString(data);
903 int size = PyString_Size(data);
904
905 if (size <= 9) {
906 PyErr_SetString(ZipImportError,
907 "bad pyc data");
908 return NULL;
909 }
910
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000911 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000912 if (Py_VerboseFlag)
913 PySys_WriteStderr("# %s has bad magic\n",
914 pathname);
915 Py_INCREF(Py_None);
916 return Py_None; /* signal caller to try alternative */
917 }
918
Just van Rossum9a3129c2003-01-03 11:18:56 +0000919 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
920 mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000921 if (Py_VerboseFlag)
922 PySys_WriteStderr("# %s has bad mtime\n",
923 pathname);
924 Py_INCREF(Py_None);
925 return Py_None; /* signal caller to try alternative */
926 }
927
928 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
929 if (code == NULL)
930 return NULL;
931 if (!PyCode_Check(code)) {
932 Py_DECREF(code);
933 PyErr_Format(PyExc_TypeError,
934 "compiled module %.200s is not a code object",
935 pathname);
936 return NULL;
937 }
938 return code;
939}
940
941/* Replace any occurances of "\r\n?" in the input string with "\n".
942 This converts DOS and Mac line endings to Unix line endings.
943 Also append a trailing "\n" to be compatible with
944 PyParser_SimpleParseFile(). Returns a new reference. */
945static PyObject *
946normalize_line_endings(PyObject *source)
947{
Just van Rossum9a3129c2003-01-03 11:18:56 +0000948 char *buf, *q, *p = PyString_AsString(source);
Just van Rossum52e14d62002-12-30 22:08:05 +0000949 PyObject *fixed_source;
950
Just van Rossum9a3129c2003-01-03 11:18:56 +0000951 /* one char extra for trailing \n and one for terminating \0 */
952 buf = PyMem_Malloc(PyString_Size(source) + 2);
953 if (buf == NULL) {
954 PyErr_SetString(PyExc_MemoryError,
955 "zipimport: no memory to allocate "
956 "source buffer");
Just van Rossum52e14d62002-12-30 22:08:05 +0000957 return NULL;
Just van Rossum9a3129c2003-01-03 11:18:56 +0000958 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000959 /* replace "\r\n?" by "\n" */
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000960 for (q = buf; *p != '\0'; p++) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000961 if (*p == '\r') {
962 *q++ = '\n';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000963 if (*(p + 1) == '\n')
Just van Rossum52e14d62002-12-30 22:08:05 +0000964 p++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000965 }
966 else
967 *q++ = *p;
Just van Rossum52e14d62002-12-30 22:08:05 +0000968 }
969 *q++ = '\n'; /* add trailing \n */
970 *q = '\0';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000971 fixed_source = PyString_FromString(buf);
972 PyMem_Free(buf);
Just van Rossum52e14d62002-12-30 22:08:05 +0000973 return fixed_source;
974}
975
976/* Given a string buffer containing Python source code, compile it
977 return and return a code object as a new reference. */
978static PyObject *
979compile_source(char *pathname, PyObject *source)
980{
981 PyObject *code, *fixed_source;
982
983 fixed_source = normalize_line_endings(source);
984 if (fixed_source == NULL)
985 return NULL;
986
987 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
988 Py_file_input);
989 Py_DECREF(fixed_source);
990 return code;
991}
992
993/* Convert the date/time values found in the Zip archive to a value
994 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +0000995static time_t
996parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +0000997{
998 struct tm stm;
999
1000 stm.tm_sec = (dostime & 0x1f) * 2;
1001 stm.tm_min = (dostime >> 5) & 0x3f;
1002 stm.tm_hour = (dostime >> 11) & 0x1f;
1003 stm.tm_mday = dosdate & 0x1f;
1004 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1005 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
Just van Rossum547eb422003-04-08 20:07:15 +00001006 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001007
1008 return mktime(&stm);
1009}
1010
1011/* Given a path to a .pyc or .pyo file in the archive, return the
1012 modifictaion time of the matching .py file, or 0 if no source
1013 is available. */
1014static time_t
1015get_mtime_of_source(ZipImporter *self, char *path)
1016{
1017 PyObject *toc_entry;
1018 time_t mtime = 0;
1019 int lastchar = strlen(path) - 1;
1020 char savechar = path[lastchar];
1021 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1022 toc_entry = PyDict_GetItemString(self->files, path);
1023 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1024 PyTuple_Size(toc_entry) == 8) {
Just van Rossumf8b6de12002-12-31 09:51:59 +00001025 /* fetch the time stamp of the .py file for comparison
1026 with an embedded pyc time stamp */
1027 int time, date;
Just van Rossum52e14d62002-12-30 22:08:05 +00001028 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1029 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1030 mtime = parse_dostime(time, date);
1031 }
1032 path[lastchar] = savechar;
1033 return mtime;
1034}
1035
1036/* Return the code object for the module named by 'fullname' from the
1037 Zip archive as a new reference. */
1038static PyObject *
1039get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1040 time_t mtime, PyObject *toc_entry)
1041{
1042 PyObject *data, *code;
1043 char *modpath;
1044 char *archive = PyString_AsString(self->archive);
1045
1046 if (archive == NULL)
1047 return NULL;
1048
1049 data = get_data(archive, toc_entry);
1050 if (data == NULL)
1051 return NULL;
1052
1053 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1054
1055 if (isbytecode) {
1056 code = unmarshal_code(modpath, data, mtime);
1057 }
1058 else {
1059 code = compile_source(modpath, data);
1060 }
1061 Py_DECREF(data);
1062 return code;
1063}
1064
1065/* Get the code object assoiciated with the module specified by
1066 'fullname'. */
1067static PyObject *
1068get_module_code(ZipImporter *self, char *fullname,
1069 int *p_ispackage, char **p_modpath)
1070{
1071 PyObject *toc_entry;
1072 char *subname, path[MAXPATHLEN + 1];
1073 int len;
1074 struct st_zip_searchorder *zso;
1075
1076 subname = get_subname(fullname);
1077
1078 len = make_filename(PyString_AsString(self->prefix), subname, path);
1079 if (len < 0)
1080 return NULL;
1081
1082 for (zso = zip_searchorder; *zso->suffix; zso++) {
1083 PyObject *code = NULL;
1084
1085 strcpy(path + len, zso->suffix);
1086 if (Py_VerboseFlag > 1)
1087 PySys_WriteStderr("# trying %s%c%s\n",
1088 PyString_AsString(self->archive),
1089 SEP, path);
1090 toc_entry = PyDict_GetItemString(self->files, path);
1091 if (toc_entry != NULL) {
1092 time_t mtime = 0;
1093 int ispackage = zso->type & IS_PACKAGE;
1094 int isbytecode = zso->type & IS_BYTECODE;
1095
1096 if (isbytecode)
1097 mtime = get_mtime_of_source(self, path);
1098 if (p_ispackage != NULL)
1099 *p_ispackage = ispackage;
1100 code = get_code_from_data(self, ispackage,
1101 isbytecode, mtime,
1102 toc_entry);
1103 if (code == Py_None) {
1104 /* bad magic number or non-matching mtime
1105 in byte code, try next */
1106 Py_DECREF(code);
1107 continue;
1108 }
1109 if (code != NULL && p_modpath != NULL)
1110 *p_modpath = PyString_AsString(
1111 PyTuple_GetItem(toc_entry, 0));
1112 return code;
1113 }
1114 }
1115 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1116 return NULL;
1117}
1118
1119
1120/* Module init */
1121
1122PyDoc_STRVAR(zipimport_doc,
1123"zipimport provides support for importing Python modules from Zip archives.\n\
1124\n\
1125This module exports three objects:\n\
1126- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1127- ZipImporterError: exception raised by zipimporter objects. It's a\n\
1128 subclass of ImportError, so it can be caught as ImportError, too.\n\
1129- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1130 info dicts, as used in zipimporter._files.\n\
1131\n\
1132It is usually not needed to use the zipimport module explicitly; it is\n\
1133used by the builtin import mechanism for sys.path items that are paths\n\
1134to Zip archives.");
1135
1136PyMODINIT_FUNC
1137initzipimport(void)
1138{
1139 PyObject *mod;
1140
1141 if (PyType_Ready(&ZipImporter_Type) < 0)
1142 return;
1143
1144 /* Correct directory separator */
1145 zip_searchorder[0].suffix[0] = SEP;
1146 zip_searchorder[1].suffix[0] = SEP;
1147 zip_searchorder[2].suffix[0] = SEP;
1148 if (Py_OptimizeFlag) {
1149 /* Reverse *.pyc and *.pyo */
1150 struct st_zip_searchorder tmp;
1151 tmp = zip_searchorder[0];
1152 zip_searchorder[0] = zip_searchorder[1];
1153 zip_searchorder[1] = tmp;
1154 tmp = zip_searchorder[3];
1155 zip_searchorder[3] = zip_searchorder[4];
1156 zip_searchorder[4] = tmp;
1157 }
1158
1159 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1160 NULL, PYTHON_API_VERSION);
1161
1162 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1163 PyExc_ImportError, NULL);
1164 if (ZipImportError == NULL)
1165 return;
1166
1167 Py_INCREF(ZipImportError);
1168 if (PyModule_AddObject(mod, "ZipImportError",
1169 ZipImportError) < 0)
1170 return;
1171
1172 Py_INCREF(&ZipImporter_Type);
1173 if (PyModule_AddObject(mod, "zipimporter",
1174 (PyObject *)&ZipImporter_Type) < 0)
1175 return;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001176
Just van Rossum52e14d62002-12-30 22:08:05 +00001177 zip_directory_cache = PyDict_New();
1178 if (zip_directory_cache == NULL)
1179 return;
1180 Py_INCREF(zip_directory_cache);
1181 if (PyModule_AddObject(mod, "_zip_directory_cache",
1182 zip_directory_cache) < 0)
1183 return;
1184}