blob: b229088892440bbf947e186ff04ea466974f8662 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include "compile.h"
6#include <time.h>
7
8
9#define IS_SOURCE 0x0
10#define IS_BYTECODE 0x1
11#define IS_PACKAGE 0x2
12
13struct st_zip_searchorder {
14 char suffix[14];
15 int type;
16};
17
18/* zip_searchorder defines how we search for a module in the Zip
19 archive: we first search for a package __init__, then for
20 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
21 are swapped by initzipimport() if we run in optimized mode. Also,
22 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000023static struct st_zip_searchorder zip_searchorder[] = {
Just van Rossum52e14d62002-12-30 22:08:05 +000024 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
26 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
27 {".pyc", IS_BYTECODE},
28 {".pyo", IS_BYTECODE},
29 {".py", IS_SOURCE},
30 {"", 0}
31};
32
33/* zipimporter object definition and support */
34
35typedef struct _zipimporter ZipImporter;
36
37struct _zipimporter {
38 PyObject_HEAD
39 PyObject *archive; /* pathname of the Zip archive */
40 PyObject *prefix; /* file prefix: "a/sub/directory/" */
41 PyObject *files; /* dict with file info {path: toc_entry} */
42};
43
44static PyTypeObject ZipImporter_Type;
45static PyObject *ZipImportError;
46static PyObject *zip_directory_cache = NULL;
47
48/* forward decls */
49static PyObject *read_directory(char *archive);
50static PyObject *get_data(char *archive, PyObject *toc_entry);
51static PyObject *get_module_code(ZipImporter *self, char *fullname,
52 int *p_ispackage, char **p_modpath);
53
54
55#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
56
57
58/* zipimporter.__init__
59 Split the "subdirectory" from the Zip archive path, lookup a matching
60 entry in sys.path_importer_cache, fetch the file directory from there
61 if found, or else read it from the archive. */
62static int
63zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
64{
65 char *path, *p, *prefix, buf[MAXPATHLEN+2];
66 int len;
67
68 if (!PyArg_ParseTuple(args, "s:zipimporter",
69 &path))
70 return -1;
71
72 len = strlen(path);
73 if (len == 0) {
74 PyErr_SetString(ZipImportError, "archive path is empty");
75 return -1;
76 }
77 if (len >= MAXPATHLEN) {
78 PyErr_SetString(ZipImportError,
79 "archive path too long");
80 return -1;
81 }
82 strcpy(buf, path);
83
84#ifdef ALTSEP
85 for (p = buf; *p; p++) {
86 if (*p == ALTSEP)
87 *p = SEP;
88 }
89#endif
90
91 path = NULL;
92 prefix = NULL;
93 for (;;) {
Martin v. Löwisa94568a2003-05-10 07:36:56 +000094#ifndef RISCOS
Just van Rossum52e14d62002-12-30 22:08:05 +000095 struct stat statbuf;
96 int rv;
97
98 rv = stat(buf, &statbuf);
99 if (rv == 0) {
100 /* it exists */
101 if (S_ISREG(statbuf.st_mode))
102 /* it's a file */
103 path = buf;
104 break;
105 }
Martin v. Löwisa94568a2003-05-10 07:36:56 +0000106#else
107 if (object_exists(buf)) {
108 /* it exists */
109 if (isfile(buf))
110 /* it's a file */
111 path = buf;
112 break;
113 }
114#endif
Just van Rossum52e14d62002-12-30 22:08:05 +0000115 /* back up one path element */
Just van Rossumd35c6db2003-01-02 12:55:48 +0000116 p = strrchr(buf, SEP);
Just van Rossum52e14d62002-12-30 22:08:05 +0000117 if (prefix != NULL)
118 *prefix = SEP;
119 if (p == NULL)
120 break;
121 *p = '\0';
122 prefix = p;
123 }
124 if (path != NULL) {
125 PyObject *files;
126 files = PyDict_GetItemString(zip_directory_cache, path);
127 if (files == NULL) {
128 files = read_directory(buf);
129 if (files == NULL)
130 return -1;
131 if (PyDict_SetItemString(zip_directory_cache, path,
132 files) != 0)
133 return -1;
134 }
135 else
136 Py_INCREF(files);
137 self->files = files;
138 }
139 else {
140 PyErr_SetString(ZipImportError, "not a Zip file");
141 return -1;
142 }
143
144 if (prefix == NULL)
145 prefix = "";
146 else {
147 prefix++;
148 len = strlen(prefix);
149 if (prefix[len-1] != SEP) {
150 /* add trailing SEP */
151 prefix[len] = SEP;
152 prefix[len + 1] = '\0';
153 }
154 }
155
156 self->archive = PyString_FromString(buf);
157 if (self->archive == NULL)
158 return -1;
159
160 self->prefix = PyString_FromString(prefix);
161 if (self->prefix == NULL)
162 return -1;
163
164 return 0;
165}
166
167/* GC support. */
168static int
169zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
170{
171 ZipImporter *self = (ZipImporter *)obj;
172 int err;
173
174 if (self->files != NULL) {
175 err = visit(self->files, arg);
176 if (err)
177 return err;
178 }
179 return 0;
180}
181
182static void
183zipimporter_dealloc(ZipImporter *self)
184{
185 PyObject_GC_UnTrack(self);
186 Py_XDECREF(self->archive);
Just van Rossumf8b6de12002-12-31 09:51:59 +0000187 Py_XDECREF(self->prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000188 Py_XDECREF(self->files);
189 self->ob_type->tp_free((PyObject *)self);
190}
191
192static PyObject *
193zipimporter_repr(ZipImporter *self)
194{
195 char buf[500];
196 char *archive = "???";
197 char *prefix = "";
198
199 if (self->archive != NULL && PyString_Check(self->archive))
200 archive = PyString_AsString(self->archive);
201 if (self->prefix != NULL && PyString_Check(self->prefix))
202 prefix = PyString_AsString(self->prefix);
203 if (prefix != NULL && *prefix)
204 PyOS_snprintf(buf, sizeof(buf),
205 "<zipimporter object \"%.300s%c%.150s\">",
206 archive, SEP, prefix);
207 else
208 PyOS_snprintf(buf, sizeof(buf),
209 "<zipimporter object \"%.300s\">",
210 archive);
211 return PyString_FromString(buf);
212}
213
214/* return fullname.split(".")[-1] */
215static char *
216get_subname(char *fullname)
217{
218 char *subname = strrchr(fullname, '.');
219 if (subname == NULL)
220 subname = fullname;
221 else
222 subname++;
223 return subname;
224}
225
226/* Given a (sub)modulename, write the potential file path in the
227 archive (without extension) to the path buffer. Return the
228 length of the resulting string. */
229static int
230make_filename(char *prefix, char *name, char *path)
231{
232 int len;
233 char *p;
234
235 len = strlen(prefix);
236
237 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
238 if (len + strlen(name) + 13 >= MAXPATHLEN) {
239 PyErr_SetString(ZipImportError, "path too long");
240 return -1;
241 }
242
243 strcpy(path, prefix);
244 strcpy(path + len, name);
245 for (p = path + len; *p; p++) {
246 if (*p == '.')
247 *p = SEP;
248 }
249 len += strlen(name);
250 return len;
251}
252
253enum module_info {
254 MI_ERROR,
255 MI_NOT_FOUND,
256 MI_MODULE,
257 MI_PACKAGE
258};
259
260/* Return some information about a module. */
261static enum module_info
262get_module_info(ZipImporter *self, char *fullname)
263{
264 char *subname, path[MAXPATHLEN + 1];
265 int len;
266 struct st_zip_searchorder *zso;
267
268 subname = get_subname(fullname);
269
270 len = make_filename(PyString_AsString(self->prefix), subname, path);
271 if (len < 0)
272 return MI_ERROR;
273
274 for (zso = zip_searchorder; *zso->suffix; zso++) {
275 strcpy(path + len, zso->suffix);
276 if (PyDict_GetItemString(self->files, path) != NULL) {
277 if (zso->type & IS_PACKAGE)
278 return MI_PACKAGE;
279 else
280 return MI_MODULE;
281 }
282 }
283 return MI_NOT_FOUND;
284}
285
286/* Check whether we can satisfy the import of the module named by
287 'fullname'. Return self if we can, None if we can't. */
288static PyObject *
289zipimporter_find_module(PyObject *obj, PyObject *args)
290{
291 ZipImporter *self = (ZipImporter *)obj;
292 PyObject *path = NULL;
293 char *fullname;
294 enum module_info mi;
295
296 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
297 &fullname, &path))
298 return NULL;
299
300 mi = get_module_info(self, fullname);
301 if (mi == MI_ERROR)
302 return NULL;
303 if (mi == MI_NOT_FOUND) {
304 Py_INCREF(Py_None);
305 return Py_None;
306 }
307 Py_INCREF(self);
308 return (PyObject *)self;
309}
310
311/* Load and return the module named by 'fullname'. */
312static PyObject *
313zipimporter_load_module(PyObject *obj, PyObject *args)
314{
315 ZipImporter *self = (ZipImporter *)obj;
316 PyObject *code, *mod, *dict;
317 char *fullname, *modpath;
318 int ispackage;
319
320 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
321 &fullname))
322 return NULL;
323
324 code = get_module_code(self, fullname, &ispackage, &modpath);
325 if (code == NULL)
326 return NULL;
327
328 mod = PyImport_AddModule(fullname);
329 if (mod == NULL) {
330 Py_DECREF(code);
331 return NULL;
332 }
333 dict = PyModule_GetDict(mod);
334
335 /* mod.__loader__ = self */
336 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
337 goto error;
338
339 if (ispackage) {
340 /* add __path__ to the module *before* the code gets
341 executed */
342 PyObject *pkgpath, *fullpath;
343 char *prefix = PyString_AsString(self->prefix);
344 char *subname = get_subname(fullname);
345 int err;
346
347 fullpath = PyString_FromFormat("%s%c%s%s",
348 PyString_AsString(self->archive),
349 SEP,
350 *prefix ? prefix : "",
351 subname);
352 if (fullpath == NULL)
353 goto error;
354
355 pkgpath = Py_BuildValue("[O]", fullpath);
356 Py_DECREF(fullpath);
357 if (pkgpath == NULL)
358 goto error;
359 err = PyDict_SetItemString(dict, "__path__", pkgpath);
360 Py_DECREF(pkgpath);
361 if (err != 0)
362 goto error;
363 }
364 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
365 Py_DECREF(code);
366 if (Py_VerboseFlag)
367 PySys_WriteStderr("import %s # loaded from Zip %s\n",
368 fullname, modpath);
369 return mod;
370error:
371 Py_DECREF(code);
372 Py_DECREF(mod);
373 return NULL;
374}
375
376/* Return a bool signifying whether the module is a package or not. */
377static PyObject *
378zipimporter_is_package(PyObject *obj, PyObject *args)
379{
380 ZipImporter *self = (ZipImporter *)obj;
381 char *fullname;
382 enum module_info mi;
383
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000384 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
Just van Rossum52e14d62002-12-30 22:08:05 +0000385 &fullname))
386 return NULL;
387
388 mi = get_module_info(self, fullname);
389 if (mi == MI_ERROR)
390 return NULL;
391 if (mi == MI_NOT_FOUND) {
392 PyErr_Format(ZipImportError, "can't find module '%.200s'",
393 fullname);
394 return NULL;
395 }
396 return PyBool_FromLong(mi == MI_PACKAGE);
397}
398
399static PyObject *
400zipimporter_get_data(PyObject *obj, PyObject *args)
401{
402 ZipImporter *self = (ZipImporter *)obj;
403 char *path;
404#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000405 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000406#endif
407 PyObject *toc_entry;
408 int len;
409
410 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
411 return NULL;
412
413#ifdef ALTSEP
414 if (strlen(path) >= MAXPATHLEN) {
415 PyErr_SetString(ZipImportError, "path too long");
416 return NULL;
417 }
418 strcpy(buf, path);
419 for (p = buf; *p; p++) {
420 if (*p == ALTSEP)
421 *p = SEP;
422 }
423 path = buf;
424#endif
425 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000426 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000427 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
428 path[len] == SEP) {
429 path = path + len + 1;
430 }
431
432 toc_entry = PyDict_GetItemString(self->files, path);
433 if (toc_entry == NULL) {
434 PyErr_Format(PyExc_IOError, "file not found [%.200s]",
435 path);
436 return NULL;
437 }
438 return get_data(PyString_AsString(self->archive), toc_entry);
439}
440
441static PyObject *
442zipimporter_get_code(PyObject *obj, PyObject *args)
443{
444 ZipImporter *self = (ZipImporter *)obj;
445 char *fullname;
446
447 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
448 return NULL;
449
450 return get_module_code(self, fullname, NULL, NULL);
451}
452
453static PyObject *
454zipimporter_get_source(PyObject *obj, PyObject *args)
455{
456 ZipImporter *self = (ZipImporter *)obj;
457 PyObject *toc_entry;
458 char *fullname, *subname, path[MAXPATHLEN+1];
459 int len;
460 enum module_info mi;
461
462 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
463 return NULL;
464
465 mi = get_module_info(self, fullname);
466 if (mi == MI_ERROR)
467 return NULL;
468 if (mi == MI_NOT_FOUND) {
469 PyErr_Format(ZipImportError, "can't find module '%.200s'",
470 fullname);
471 return NULL;
472 }
473 subname = get_subname(fullname);
474
475 len = make_filename(PyString_AsString(self->prefix), subname, path);
476 if (len < 0)
477 return NULL;
478
479 if (mi == MI_PACKAGE) {
480 path[len] = SEP;
481 strcpy(path + len + 1, "__init__.py");
482 }
483 else
484 strcpy(path + len, ".py");
485
486 toc_entry = PyDict_GetItemString(self->files, path);
487 if (toc_entry != NULL)
488 return get_data(PyString_AsString(self->archive), toc_entry);
489
490 /* we have the module, but no source */
491 Py_INCREF(Py_None);
492 return Py_None;
493}
494
495PyDoc_STRVAR(doc_find_module,
496"find_module(fullname, path=None) -> self or None.\n\
497\n\
498Search for a module specified by 'fullname'. 'fullname' must be the\n\
499fully qualified (dotted) module name. It returns the zipimporter\n\
500instance itself if the module was found, or None if it wasn't.\n\
501The optional 'path' argument is ignored -- it's there for compatibility\n\
502with the importer protocol.");
503
504PyDoc_STRVAR(doc_load_module,
505"load_module(fullname) -> module.\n\
506\n\
507Load the module specified by 'fullname'. 'fullname' must be the\n\
508fully qualified (dotted) module name. It returns the imported\n\
509module, or raises ZipImportError if it wasn't found.");
510
511PyDoc_STRVAR(doc_get_data,
512"get_data(pathname) -> string with file data.\n\
513\n\
514Return the data associated with 'pathname'. Raise IOError if\n\
515the file wasn't found.");
516
517PyDoc_STRVAR(doc_is_package,
518"is_package(fullname) -> bool.\n\
519\n\
520Return True if the module specified by fullname is a package.\n\
521Raise ZipImportError is the module couldn't be found.");
522
523PyDoc_STRVAR(doc_get_code,
524"get_code(fullname) -> code object.\n\
525\n\
526Return the code object for the specified module. Raise ZipImportError\n\
527is the module couldn't be found.");
528
529PyDoc_STRVAR(doc_get_source,
530"get_source(fullname) -> source string.\n\
531\n\
532Return the source code for the specified module. Raise ZipImportError\n\
533is the module couldn't be found, return None if the archive does\n\
534contain the module, but has no source for it.");
535
536static PyMethodDef zipimporter_methods[] = {
537 {"find_module", zipimporter_find_module, METH_VARARGS,
538 doc_find_module},
539 {"load_module", zipimporter_load_module, METH_VARARGS,
540 doc_load_module},
541 {"get_data", zipimporter_get_data, METH_VARARGS,
542 doc_get_data},
543 {"get_code", zipimporter_get_code, METH_VARARGS,
544 doc_get_code},
545 {"get_source", zipimporter_get_source, METH_VARARGS,
546 doc_get_source},
547 {"is_package", zipimporter_is_package, METH_VARARGS,
548 doc_is_package},
549 {NULL, NULL} /* sentinel */
550};
551
552static PyMemberDef zipimporter_members[] = {
553 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
554 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
555 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
556 {NULL}
557};
558
559PyDoc_STRVAR(zipimporter_doc,
560"zipimporter(archivepath) -> zipimporter object\n\
561\n\
562Create a new zipimporter instance. 'archivepath' must be a path to\n\
563a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
564a valid Zip archive.");
565
566#define DEFERRED_ADDRESS(ADDR) 0
567
568static PyTypeObject ZipImporter_Type = {
569 PyObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type))
570 0,
571 "zipimport.zipimporter",
572 sizeof(ZipImporter),
573 0, /* tp_itemsize */
574 (destructor)zipimporter_dealloc, /* tp_dealloc */
575 0, /* tp_print */
576 0, /* tp_getattr */
577 0, /* tp_setattr */
578 0, /* tp_compare */
579 (reprfunc)zipimporter_repr, /* tp_repr */
580 0, /* tp_as_number */
581 0, /* tp_as_sequence */
582 0, /* tp_as_mapping */
583 0, /* tp_hash */
584 0, /* tp_call */
585 0, /* tp_str */
586 PyObject_GenericGetAttr, /* tp_getattro */
587 0, /* tp_setattro */
588 0, /* tp_as_buffer */
589 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
590 Py_TPFLAGS_HAVE_GC, /* tp_flags */
591 zipimporter_doc, /* tp_doc */
592 zipimporter_traverse, /* tp_traverse */
593 0, /* tp_clear */
594 0, /* tp_richcompare */
595 0, /* tp_weaklistoffset */
596 0, /* tp_iter */
597 0, /* tp_iternext */
598 zipimporter_methods, /* tp_methods */
599 zipimporter_members, /* tp_members */
600 0, /* tp_getset */
601 0, /* tp_base */
602 0, /* tp_dict */
603 0, /* tp_descr_get */
604 0, /* tp_descr_set */
605 0, /* tp_dictoffset */
606 (initproc)zipimporter_init, /* tp_init */
607 PyType_GenericAlloc, /* tp_alloc */
608 PyType_GenericNew, /* tp_new */
609 PyObject_GC_Del, /* tp_free */
610};
611
612
613/* implementation */
614
Just van Rossum52e14d62002-12-30 22:08:05 +0000615/* Given a buffer, return the long that is represented by the first
616 4 bytes, encoded as little endian. This partially reimplements
617 marshal.c:r_long() */
618static long
619get_long(unsigned char *buf) {
620 long x;
621 x = buf[0];
622 x |= (long)buf[1] << 8;
623 x |= (long)buf[2] << 16;
624 x |= (long)buf[3] << 24;
625#if SIZEOF_LONG > 4
626 /* Sign extension for 64-bit machines */
627 x |= -(x & 0x80000000L);
628#endif
629 return x;
630}
631
632/*
633 read_directory(archive) -> files dict (new reference)
634
635 Given a path to a Zip archive, build a dict, mapping file names
636 (local to the archive, using SEP as a separator) to toc entries.
637
638 A toc_entry is a tuple:
639
640 (compress, # compression kind; 0 for uncompressed
641 data_size, # size of compressed data on disk
642 file_size, # size of decompressed data
643 file_offset, # offset of file header from start of archive
644 time, # mod time of file (in dos format)
645 date, # mod data of file (in dos format)
646 crc, # crc checksum of the data
647 )
648
649 Directories can be recognized by the trailing SEP in the name,
650 data_size and file_offset are 0.
651*/
652static PyObject *
653read_directory(char *archive)
654{
655 PyObject *files = NULL;
656 FILE *fp;
657 long compress, crc, data_size, file_size, file_offset, date, time;
Thomas Heller354e3d92003-07-22 18:10:15 +0000658 long header_offset, name_size, header_size, header_position;
Just van Rossum52e14d62002-12-30 22:08:05 +0000659 long i, l, length, count;
660 char path[MAXPATHLEN + 5];
661 char name[MAXPATHLEN + 5];
662 char *p, endof_central_dir[22];
Thomas Heller354e3d92003-07-22 18:10:15 +0000663 long arc_offset; /* offset from beginning of file to start of zip-archive */
Just van Rossum52e14d62002-12-30 22:08:05 +0000664
665 if (strlen(archive) > MAXPATHLEN) {
666 PyErr_SetString(PyExc_OverflowError,
667 "Zip path name is too long");
668 return NULL;
669 }
670 strcpy(path, archive);
671
672 fp = fopen(archive, "rb");
673 if (fp == NULL) {
674 PyErr_Format(ZipImportError, "can't open Zip file: "
675 "'%.200s'", archive);
676 return NULL;
677 }
Just van Rossumf4ecc752003-02-28 08:54:01 +0000678 fseek(fp, -22, SEEK_END);
Thomas Heller354e3d92003-07-22 18:10:15 +0000679 header_position = ftell(fp);
Just van Rossum52e14d62002-12-30 22:08:05 +0000680 if (fread(endof_central_dir, 1, 22, fp) != 22) {
681 fclose(fp);
682 PyErr_Format(ZipImportError, "can't read Zip file: "
683 "'%.200s'", archive);
684 return NULL;
685 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000686 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000687 /* Bad: End of Central Dir signature */
688 fclose(fp);
689 PyErr_Format(ZipImportError, "not a Zip file: "
690 "'%.200s'", archive);
691 return NULL;
692 }
693
Thomas Heller354e3d92003-07-22 18:10:15 +0000694 header_size = get_long((unsigned char *)endof_central_dir + 12);
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000695 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Thomas Heller354e3d92003-07-22 18:10:15 +0000696 arc_offset = header_position - header_offset - header_size;
697 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000698
699 files = PyDict_New();
700 if (files == NULL)
701 goto error;
702
703 length = (long)strlen(path);
704 path[length] = SEP;
705
706 /* Start of Central Directory */
707 count = 0;
708 for (;;) {
709 PyObject *t;
710 int err;
711
712 fseek(fp, header_offset, 0); /* Start of file header */
713 l = PyMarshal_ReadLongFromFile(fp);
714 if (l != 0x02014B50)
715 break; /* Bad: Central Dir File Header */
716 fseek(fp, header_offset + 10, 0);
717 compress = PyMarshal_ReadShortFromFile(fp);
718 time = PyMarshal_ReadShortFromFile(fp);
719 date = PyMarshal_ReadShortFromFile(fp);
720 crc = PyMarshal_ReadLongFromFile(fp);
721 data_size = PyMarshal_ReadLongFromFile(fp);
722 file_size = PyMarshal_ReadLongFromFile(fp);
723 name_size = PyMarshal_ReadShortFromFile(fp);
724 header_size = 46 + name_size +
725 PyMarshal_ReadShortFromFile(fp) +
726 PyMarshal_ReadShortFromFile(fp);
727 fseek(fp, header_offset + 42, 0);
Thomas Heller354e3d92003-07-22 18:10:15 +0000728 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000729 if (name_size > MAXPATHLEN)
730 name_size = MAXPATHLEN;
731
732 p = name;
733 for (i = 0; i < name_size; i++) {
734 *p = (char)getc(fp);
735 if (*p == '/')
736 *p = SEP;
737 p++;
738 }
739 *p = 0; /* Add terminating null byte */
740 header_offset += header_size;
741
742 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
743
744 t = Py_BuildValue("siiiiiii", path, compress, data_size,
745 file_size, file_offset, time, date, crc);
746 if (t == NULL)
747 goto error;
748 err = PyDict_SetItemString(files, name, t);
749 Py_DECREF(t);
750 if (err != 0)
751 goto error;
752 count++;
753 }
754 fclose(fp);
755 if (Py_VerboseFlag)
756 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
757 count, archive);
758 return files;
759error:
760 fclose(fp);
761 Py_XDECREF(files);
762 return NULL;
763}
764
765/* Return the zlib.decompress function object, or NULL if zlib couldn't
766 be imported. The function is cached when found, so subsequent calls
767 don't import zlib again. Returns a *borrowed* reference.
768 XXX This makes zlib.decompress immortal. */
769static PyObject *
770get_decompress_func(void)
771{
772 static PyObject *decompress = NULL;
773
774 if (decompress == NULL) {
775 PyObject *zlib;
776 static int importing_zlib = 0;
777
778 if (importing_zlib != 0)
779 /* Someone has a zlib.py[co] in their Zip file;
780 let's avoid a stack overflow. */
781 return NULL;
782 importing_zlib = 1;
783 zlib = PyImport_ImportModule("zlib"); /* import zlib */
784 importing_zlib = 0;
785 if (zlib != NULL) {
786 decompress = PyObject_GetAttrString(zlib,
787 "decompress");
788 Py_DECREF(zlib);
789 }
790 else
791 PyErr_Clear();
792 if (Py_VerboseFlag)
793 PySys_WriteStderr("# zipimport: zlib %s\n",
794 zlib != NULL ? "available": "UNAVAILABLE");
795 }
796 return decompress;
797}
798
799/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
800 data as a new reference. */
801static PyObject *
802get_data(char *archive, PyObject *toc_entry)
803{
804 PyObject *raw_data, *data = NULL, *decompress;
805 char *buf;
806 FILE *fp;
807 int err, bytes_read = 0;
808 long l;
809 char *datapath;
810 long compress, data_size, file_size, file_offset;
811 long time, date, crc;
812
Neal Norwitz0c0aad92003-02-18 03:37:49 +0000813 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
Just van Rossum52e14d62002-12-30 22:08:05 +0000814 &data_size, &file_size, &file_offset, &time,
815 &date, &crc)) {
816 return NULL;
817 }
818
819 fp = fopen(archive, "rb");
820 if (!fp) {
821 PyErr_Format(PyExc_IOError,
822 "zipimport: can not open file %s", archive);
823 return NULL;
824 }
825
826 /* Check to make sure the local file header is correct */
827 fseek(fp, file_offset, 0);
828 l = PyMarshal_ReadLongFromFile(fp);
829 if (l != 0x04034B50) {
830 /* Bad: Local File Header */
831 PyErr_Format(ZipImportError,
832 "bad local file header in %s",
833 archive);
834 fclose(fp);
835 return NULL;
836 }
837 fseek(fp, file_offset + 26, 0);
838 l = 30 + PyMarshal_ReadShortFromFile(fp) +
839 PyMarshal_ReadShortFromFile(fp); /* local header size */
840 file_offset += l; /* Start of file data */
841
842 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
843 data_size : data_size + 1);
844 if (raw_data == NULL) {
845 fclose(fp);
846 return NULL;
847 }
848 buf = PyString_AsString(raw_data);
849
850 err = fseek(fp, file_offset, 0);
851 if (err == 0)
852 bytes_read = fread(buf, 1, data_size, fp);
853 fclose(fp);
854 if (err || bytes_read != data_size) {
855 PyErr_SetString(PyExc_IOError,
856 "zipimport: can't read data");
857 Py_DECREF(raw_data);
858 return NULL;
859 }
860
861 if (compress != 0) {
862 buf[data_size] = 'Z'; /* saw this in zipfile.py */
863 data_size++;
864 }
865 buf[data_size] = '\0';
866
867 if (compress == 0) /* data is not compressed */
868 return raw_data;
869
870 /* Decompress with zlib */
871 decompress = get_decompress_func();
872 if (decompress == NULL) {
873 PyErr_SetString(ZipImportError,
874 "can't decompress data; "
875 "zlib not available");
876 goto error;
877 }
Just van Rossumee8f10f2003-09-07 13:36:48 +0000878 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000879error:
880 Py_DECREF(raw_data);
881 return data;
882}
883
884/* Lenient date/time comparison function. The precision of the mtime
885 in the archive is lower than the mtime stored in a .pyc: we
886 must allow a difference of at most one second. */
887static int
888eq_mtime(time_t t1, time_t t2)
889{
890 time_t d = t1 - t2;
891 if (d < 0)
892 d = -d;
893 /* dostime only stores even seconds, so be lenient */
894 return d <= 1;
895}
896
897/* Given the contents of a .py[co] file in a buffer, unmarshal the data
898 and return the code object. Return None if it the magic word doesn't
899 match (we do this instead of raising an exception as we fall back
900 to .py if available and we don't want to mask other errors).
901 Returns a new reference. */
902static PyObject *
903unmarshal_code(char *pathname, PyObject *data, time_t mtime)
904{
905 PyObject *code;
906 char *buf = PyString_AsString(data);
907 int size = PyString_Size(data);
908
909 if (size <= 9) {
910 PyErr_SetString(ZipImportError,
911 "bad pyc data");
912 return NULL;
913 }
914
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000915 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000916 if (Py_VerboseFlag)
917 PySys_WriteStderr("# %s has bad magic\n",
918 pathname);
919 Py_INCREF(Py_None);
920 return Py_None; /* signal caller to try alternative */
921 }
922
Just van Rossum9a3129c2003-01-03 11:18:56 +0000923 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
924 mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000925 if (Py_VerboseFlag)
926 PySys_WriteStderr("# %s has bad mtime\n",
927 pathname);
928 Py_INCREF(Py_None);
929 return Py_None; /* signal caller to try alternative */
930 }
931
932 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
933 if (code == NULL)
934 return NULL;
935 if (!PyCode_Check(code)) {
936 Py_DECREF(code);
937 PyErr_Format(PyExc_TypeError,
938 "compiled module %.200s is not a code object",
939 pathname);
940 return NULL;
941 }
942 return code;
943}
944
945/* Replace any occurances of "\r\n?" in the input string with "\n".
946 This converts DOS and Mac line endings to Unix line endings.
947 Also append a trailing "\n" to be compatible with
948 PyParser_SimpleParseFile(). Returns a new reference. */
949static PyObject *
950normalize_line_endings(PyObject *source)
951{
Just van Rossum9a3129c2003-01-03 11:18:56 +0000952 char *buf, *q, *p = PyString_AsString(source);
Just van Rossum52e14d62002-12-30 22:08:05 +0000953 PyObject *fixed_source;
954
Just van Rossum9a3129c2003-01-03 11:18:56 +0000955 /* one char extra for trailing \n and one for terminating \0 */
956 buf = PyMem_Malloc(PyString_Size(source) + 2);
957 if (buf == NULL) {
958 PyErr_SetString(PyExc_MemoryError,
959 "zipimport: no memory to allocate "
960 "source buffer");
Just van Rossum52e14d62002-12-30 22:08:05 +0000961 return NULL;
Just van Rossum9a3129c2003-01-03 11:18:56 +0000962 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000963 /* replace "\r\n?" by "\n" */
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000964 for (q = buf; *p != '\0'; p++) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000965 if (*p == '\r') {
966 *q++ = '\n';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000967 if (*(p + 1) == '\n')
Just van Rossum52e14d62002-12-30 22:08:05 +0000968 p++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000969 }
970 else
971 *q++ = *p;
Just van Rossum52e14d62002-12-30 22:08:05 +0000972 }
973 *q++ = '\n'; /* add trailing \n */
974 *q = '\0';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000975 fixed_source = PyString_FromString(buf);
976 PyMem_Free(buf);
Just van Rossum52e14d62002-12-30 22:08:05 +0000977 return fixed_source;
978}
979
980/* Given a string buffer containing Python source code, compile it
981 return and return a code object as a new reference. */
982static PyObject *
983compile_source(char *pathname, PyObject *source)
984{
985 PyObject *code, *fixed_source;
986
987 fixed_source = normalize_line_endings(source);
988 if (fixed_source == NULL)
989 return NULL;
990
991 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
992 Py_file_input);
993 Py_DECREF(fixed_source);
994 return code;
995}
996
997/* Convert the date/time values found in the Zip archive to a value
998 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +0000999static time_t
1000parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001001{
1002 struct tm stm;
1003
1004 stm.tm_sec = (dostime & 0x1f) * 2;
1005 stm.tm_min = (dostime >> 5) & 0x3f;
1006 stm.tm_hour = (dostime >> 11) & 0x1f;
1007 stm.tm_mday = dosdate & 0x1f;
1008 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1009 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
Just van Rossum547eb422003-04-08 20:07:15 +00001010 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001011
1012 return mktime(&stm);
1013}
1014
1015/* Given a path to a .pyc or .pyo file in the archive, return the
1016 modifictaion time of the matching .py file, or 0 if no source
1017 is available. */
1018static time_t
1019get_mtime_of_source(ZipImporter *self, char *path)
1020{
1021 PyObject *toc_entry;
1022 time_t mtime = 0;
1023 int lastchar = strlen(path) - 1;
1024 char savechar = path[lastchar];
1025 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1026 toc_entry = PyDict_GetItemString(self->files, path);
1027 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1028 PyTuple_Size(toc_entry) == 8) {
Just van Rossumf8b6de12002-12-31 09:51:59 +00001029 /* fetch the time stamp of the .py file for comparison
1030 with an embedded pyc time stamp */
1031 int time, date;
Just van Rossum52e14d62002-12-30 22:08:05 +00001032 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1033 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1034 mtime = parse_dostime(time, date);
1035 }
1036 path[lastchar] = savechar;
1037 return mtime;
1038}
1039
1040/* Return the code object for the module named by 'fullname' from the
1041 Zip archive as a new reference. */
1042static PyObject *
1043get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1044 time_t mtime, PyObject *toc_entry)
1045{
1046 PyObject *data, *code;
1047 char *modpath;
1048 char *archive = PyString_AsString(self->archive);
1049
1050 if (archive == NULL)
1051 return NULL;
1052
1053 data = get_data(archive, toc_entry);
1054 if (data == NULL)
1055 return NULL;
1056
1057 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1058
1059 if (isbytecode) {
1060 code = unmarshal_code(modpath, data, mtime);
1061 }
1062 else {
1063 code = compile_source(modpath, data);
1064 }
1065 Py_DECREF(data);
1066 return code;
1067}
1068
1069/* Get the code object assoiciated with the module specified by
1070 'fullname'. */
1071static PyObject *
1072get_module_code(ZipImporter *self, char *fullname,
1073 int *p_ispackage, char **p_modpath)
1074{
1075 PyObject *toc_entry;
1076 char *subname, path[MAXPATHLEN + 1];
1077 int len;
1078 struct st_zip_searchorder *zso;
1079
1080 subname = get_subname(fullname);
1081
1082 len = make_filename(PyString_AsString(self->prefix), subname, path);
1083 if (len < 0)
1084 return NULL;
1085
1086 for (zso = zip_searchorder; *zso->suffix; zso++) {
1087 PyObject *code = NULL;
1088
1089 strcpy(path + len, zso->suffix);
1090 if (Py_VerboseFlag > 1)
1091 PySys_WriteStderr("# trying %s%c%s\n",
1092 PyString_AsString(self->archive),
1093 SEP, path);
1094 toc_entry = PyDict_GetItemString(self->files, path);
1095 if (toc_entry != NULL) {
1096 time_t mtime = 0;
1097 int ispackage = zso->type & IS_PACKAGE;
1098 int isbytecode = zso->type & IS_BYTECODE;
1099
1100 if (isbytecode)
1101 mtime = get_mtime_of_source(self, path);
1102 if (p_ispackage != NULL)
1103 *p_ispackage = ispackage;
1104 code = get_code_from_data(self, ispackage,
1105 isbytecode, mtime,
1106 toc_entry);
1107 if (code == Py_None) {
1108 /* bad magic number or non-matching mtime
1109 in byte code, try next */
1110 Py_DECREF(code);
1111 continue;
1112 }
1113 if (code != NULL && p_modpath != NULL)
1114 *p_modpath = PyString_AsString(
1115 PyTuple_GetItem(toc_entry, 0));
1116 return code;
1117 }
1118 }
1119 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1120 return NULL;
1121}
1122
1123
1124/* Module init */
1125
1126PyDoc_STRVAR(zipimport_doc,
1127"zipimport provides support for importing Python modules from Zip archives.\n\
1128\n\
1129This module exports three objects:\n\
1130- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1131- ZipImporterError: exception raised by zipimporter objects. It's a\n\
1132 subclass of ImportError, so it can be caught as ImportError, too.\n\
1133- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1134 info dicts, as used in zipimporter._files.\n\
1135\n\
1136It is usually not needed to use the zipimport module explicitly; it is\n\
1137used by the builtin import mechanism for sys.path items that are paths\n\
1138to Zip archives.");
1139
1140PyMODINIT_FUNC
1141initzipimport(void)
1142{
1143 PyObject *mod;
1144
1145 if (PyType_Ready(&ZipImporter_Type) < 0)
1146 return;
1147
1148 /* Correct directory separator */
1149 zip_searchorder[0].suffix[0] = SEP;
1150 zip_searchorder[1].suffix[0] = SEP;
1151 zip_searchorder[2].suffix[0] = SEP;
1152 if (Py_OptimizeFlag) {
1153 /* Reverse *.pyc and *.pyo */
1154 struct st_zip_searchorder tmp;
1155 tmp = zip_searchorder[0];
1156 zip_searchorder[0] = zip_searchorder[1];
1157 zip_searchorder[1] = tmp;
1158 tmp = zip_searchorder[3];
1159 zip_searchorder[3] = zip_searchorder[4];
1160 zip_searchorder[4] = tmp;
1161 }
1162
1163 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1164 NULL, PYTHON_API_VERSION);
1165
1166 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1167 PyExc_ImportError, NULL);
1168 if (ZipImportError == NULL)
1169 return;
1170
1171 Py_INCREF(ZipImportError);
1172 if (PyModule_AddObject(mod, "ZipImportError",
1173 ZipImportError) < 0)
1174 return;
1175
1176 Py_INCREF(&ZipImporter_Type);
1177 if (PyModule_AddObject(mod, "zipimporter",
1178 (PyObject *)&ZipImporter_Type) < 0)
1179 return;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001180
Just van Rossum52e14d62002-12-30 22:08:05 +00001181 zip_directory_cache = PyDict_New();
1182 if (zip_directory_cache == NULL)
1183 return;
1184 Py_INCREF(zip_directory_cache);
1185 if (PyModule_AddObject(mod, "_zip_directory_cache",
1186 zip_directory_cache) < 0)
1187 return;
1188}