blob: e445300cbf96989d92332577db38c4ec5f80c87d [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include "compile.h"
6#include <time.h>
7
8
9#define IS_SOURCE 0x0
10#define IS_BYTECODE 0x1
11#define IS_PACKAGE 0x2
12
13struct st_zip_searchorder {
14 char suffix[14];
15 int type;
16};
17
18/* zip_searchorder defines how we search for a module in the Zip
19 archive: we first search for a package __init__, then for
20 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
21 are swapped by initzipimport() if we run in optimized mode. Also,
22 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000023static struct st_zip_searchorder zip_searchorder[] = {
Just van Rossum52e14d62002-12-30 22:08:05 +000024 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
26 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
27 {".pyc", IS_BYTECODE},
28 {".pyo", IS_BYTECODE},
29 {".py", IS_SOURCE},
30 {"", 0}
31};
32
33/* zipimporter object definition and support */
34
35typedef struct _zipimporter ZipImporter;
36
37struct _zipimporter {
38 PyObject_HEAD
39 PyObject *archive; /* pathname of the Zip archive */
40 PyObject *prefix; /* file prefix: "a/sub/directory/" */
41 PyObject *files; /* dict with file info {path: toc_entry} */
42};
43
44static PyTypeObject ZipImporter_Type;
45static PyObject *ZipImportError;
46static PyObject *zip_directory_cache = NULL;
47
48/* forward decls */
49static PyObject *read_directory(char *archive);
50static PyObject *get_data(char *archive, PyObject *toc_entry);
51static PyObject *get_module_code(ZipImporter *self, char *fullname,
52 int *p_ispackage, char **p_modpath);
53
54
55#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
56
57
58/* zipimporter.__init__
59 Split the "subdirectory" from the Zip archive path, lookup a matching
60 entry in sys.path_importer_cache, fetch the file directory from there
61 if found, or else read it from the archive. */
62static int
63zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
64{
65 char *path, *p, *prefix, buf[MAXPATHLEN+2];
66 int len;
67
Georg Brandl02c42872005-08-26 06:42:30 +000068 if (!_PyArg_NoKeywords("zipimporter()", kwds))
69 return -1;
70
Just van Rossum52e14d62002-12-30 22:08:05 +000071 if (!PyArg_ParseTuple(args, "s:zipimporter",
72 &path))
73 return -1;
74
75 len = strlen(path);
76 if (len == 0) {
77 PyErr_SetString(ZipImportError, "archive path is empty");
78 return -1;
79 }
80 if (len >= MAXPATHLEN) {
81 PyErr_SetString(ZipImportError,
82 "archive path too long");
83 return -1;
84 }
85 strcpy(buf, path);
86
87#ifdef ALTSEP
88 for (p = buf; *p; p++) {
89 if (*p == ALTSEP)
90 *p = SEP;
91 }
92#endif
93
94 path = NULL;
95 prefix = NULL;
96 for (;;) {
Martin v. Löwisa94568a2003-05-10 07:36:56 +000097#ifndef RISCOS
Just van Rossum52e14d62002-12-30 22:08:05 +000098 struct stat statbuf;
99 int rv;
100
101 rv = stat(buf, &statbuf);
102 if (rv == 0) {
103 /* it exists */
104 if (S_ISREG(statbuf.st_mode))
105 /* it's a file */
106 path = buf;
107 break;
108 }
Martin v. Löwisa94568a2003-05-10 07:36:56 +0000109#else
110 if (object_exists(buf)) {
111 /* it exists */
112 if (isfile(buf))
113 /* it's a file */
114 path = buf;
115 break;
116 }
117#endif
Just van Rossum52e14d62002-12-30 22:08:05 +0000118 /* back up one path element */
Just van Rossumd35c6db2003-01-02 12:55:48 +0000119 p = strrchr(buf, SEP);
Just van Rossum52e14d62002-12-30 22:08:05 +0000120 if (prefix != NULL)
121 *prefix = SEP;
122 if (p == NULL)
123 break;
124 *p = '\0';
125 prefix = p;
126 }
127 if (path != NULL) {
128 PyObject *files;
129 files = PyDict_GetItemString(zip_directory_cache, path);
130 if (files == NULL) {
131 files = read_directory(buf);
132 if (files == NULL)
133 return -1;
134 if (PyDict_SetItemString(zip_directory_cache, path,
135 files) != 0)
136 return -1;
137 }
138 else
139 Py_INCREF(files);
140 self->files = files;
141 }
142 else {
143 PyErr_SetString(ZipImportError, "not a Zip file");
144 return -1;
145 }
146
147 if (prefix == NULL)
148 prefix = "";
149 else {
150 prefix++;
151 len = strlen(prefix);
152 if (prefix[len-1] != SEP) {
153 /* add trailing SEP */
154 prefix[len] = SEP;
155 prefix[len + 1] = '\0';
156 }
157 }
158
159 self->archive = PyString_FromString(buf);
160 if (self->archive == NULL)
161 return -1;
162
163 self->prefix = PyString_FromString(prefix);
164 if (self->prefix == NULL)
165 return -1;
166
167 return 0;
168}
169
170/* GC support. */
171static int
172zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
173{
174 ZipImporter *self = (ZipImporter *)obj;
175 int err;
176
177 if (self->files != NULL) {
178 err = visit(self->files, arg);
179 if (err)
180 return err;
181 }
182 return 0;
183}
184
185static void
186zipimporter_dealloc(ZipImporter *self)
187{
188 PyObject_GC_UnTrack(self);
189 Py_XDECREF(self->archive);
Just van Rossumf8b6de12002-12-31 09:51:59 +0000190 Py_XDECREF(self->prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000191 Py_XDECREF(self->files);
192 self->ob_type->tp_free((PyObject *)self);
193}
194
195static PyObject *
196zipimporter_repr(ZipImporter *self)
197{
198 char buf[500];
199 char *archive = "???";
200 char *prefix = "";
201
202 if (self->archive != NULL && PyString_Check(self->archive))
203 archive = PyString_AsString(self->archive);
204 if (self->prefix != NULL && PyString_Check(self->prefix))
205 prefix = PyString_AsString(self->prefix);
206 if (prefix != NULL && *prefix)
207 PyOS_snprintf(buf, sizeof(buf),
208 "<zipimporter object \"%.300s%c%.150s\">",
209 archive, SEP, prefix);
210 else
211 PyOS_snprintf(buf, sizeof(buf),
212 "<zipimporter object \"%.300s\">",
213 archive);
214 return PyString_FromString(buf);
215}
216
217/* return fullname.split(".")[-1] */
218static char *
219get_subname(char *fullname)
220{
221 char *subname = strrchr(fullname, '.');
222 if (subname == NULL)
223 subname = fullname;
224 else
225 subname++;
226 return subname;
227}
228
229/* Given a (sub)modulename, write the potential file path in the
230 archive (without extension) to the path buffer. Return the
231 length of the resulting string. */
232static int
233make_filename(char *prefix, char *name, char *path)
234{
235 int len;
236 char *p;
237
238 len = strlen(prefix);
239
240 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
241 if (len + strlen(name) + 13 >= MAXPATHLEN) {
242 PyErr_SetString(ZipImportError, "path too long");
243 return -1;
244 }
245
246 strcpy(path, prefix);
247 strcpy(path + len, name);
248 for (p = path + len; *p; p++) {
249 if (*p == '.')
250 *p = SEP;
251 }
252 len += strlen(name);
253 return len;
254}
255
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000256enum zi_module_info {
Just van Rossum52e14d62002-12-30 22:08:05 +0000257 MI_ERROR,
258 MI_NOT_FOUND,
259 MI_MODULE,
260 MI_PACKAGE
261};
262
263/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000264static enum zi_module_info
Just van Rossum52e14d62002-12-30 22:08:05 +0000265get_module_info(ZipImporter *self, char *fullname)
266{
267 char *subname, path[MAXPATHLEN + 1];
268 int len;
269 struct st_zip_searchorder *zso;
270
271 subname = get_subname(fullname);
272
273 len = make_filename(PyString_AsString(self->prefix), subname, path);
274 if (len < 0)
275 return MI_ERROR;
276
277 for (zso = zip_searchorder; *zso->suffix; zso++) {
278 strcpy(path + len, zso->suffix);
279 if (PyDict_GetItemString(self->files, path) != NULL) {
280 if (zso->type & IS_PACKAGE)
281 return MI_PACKAGE;
282 else
283 return MI_MODULE;
284 }
285 }
286 return MI_NOT_FOUND;
287}
288
289/* Check whether we can satisfy the import of the module named by
290 'fullname'. Return self if we can, None if we can't. */
291static PyObject *
292zipimporter_find_module(PyObject *obj, PyObject *args)
293{
294 ZipImporter *self = (ZipImporter *)obj;
295 PyObject *path = NULL;
296 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000297 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000298
299 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
300 &fullname, &path))
301 return NULL;
302
303 mi = get_module_info(self, fullname);
304 if (mi == MI_ERROR)
305 return NULL;
306 if (mi == MI_NOT_FOUND) {
307 Py_INCREF(Py_None);
308 return Py_None;
309 }
310 Py_INCREF(self);
311 return (PyObject *)self;
312}
313
314/* Load and return the module named by 'fullname'. */
315static PyObject *
316zipimporter_load_module(PyObject *obj, PyObject *args)
317{
318 ZipImporter *self = (ZipImporter *)obj;
319 PyObject *code, *mod, *dict;
320 char *fullname, *modpath;
321 int ispackage;
322
323 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
324 &fullname))
325 return NULL;
326
327 code = get_module_code(self, fullname, &ispackage, &modpath);
328 if (code == NULL)
329 return NULL;
330
331 mod = PyImport_AddModule(fullname);
332 if (mod == NULL) {
333 Py_DECREF(code);
334 return NULL;
335 }
336 dict = PyModule_GetDict(mod);
337
338 /* mod.__loader__ = self */
339 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
340 goto error;
341
342 if (ispackage) {
343 /* add __path__ to the module *before* the code gets
344 executed */
345 PyObject *pkgpath, *fullpath;
346 char *prefix = PyString_AsString(self->prefix);
347 char *subname = get_subname(fullname);
348 int err;
349
350 fullpath = PyString_FromFormat("%s%c%s%s",
351 PyString_AsString(self->archive),
352 SEP,
353 *prefix ? prefix : "",
354 subname);
355 if (fullpath == NULL)
356 goto error;
357
358 pkgpath = Py_BuildValue("[O]", fullpath);
359 Py_DECREF(fullpath);
360 if (pkgpath == NULL)
361 goto error;
362 err = PyDict_SetItemString(dict, "__path__", pkgpath);
363 Py_DECREF(pkgpath);
364 if (err != 0)
365 goto error;
366 }
367 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
368 Py_DECREF(code);
369 if (Py_VerboseFlag)
370 PySys_WriteStderr("import %s # loaded from Zip %s\n",
371 fullname, modpath);
372 return mod;
373error:
374 Py_DECREF(code);
375 Py_DECREF(mod);
376 return NULL;
377}
378
379/* Return a bool signifying whether the module is a package or not. */
380static PyObject *
381zipimporter_is_package(PyObject *obj, PyObject *args)
382{
383 ZipImporter *self = (ZipImporter *)obj;
384 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000385 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000386
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000387 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
Just van Rossum52e14d62002-12-30 22:08:05 +0000388 &fullname))
389 return NULL;
390
391 mi = get_module_info(self, fullname);
392 if (mi == MI_ERROR)
393 return NULL;
394 if (mi == MI_NOT_FOUND) {
395 PyErr_Format(ZipImportError, "can't find module '%.200s'",
396 fullname);
397 return NULL;
398 }
399 return PyBool_FromLong(mi == MI_PACKAGE);
400}
401
402static PyObject *
403zipimporter_get_data(PyObject *obj, PyObject *args)
404{
405 ZipImporter *self = (ZipImporter *)obj;
406 char *path;
407#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000408 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000409#endif
410 PyObject *toc_entry;
411 int len;
412
413 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
414 return NULL;
415
416#ifdef ALTSEP
417 if (strlen(path) >= MAXPATHLEN) {
418 PyErr_SetString(ZipImportError, "path too long");
419 return NULL;
420 }
421 strcpy(buf, path);
422 for (p = buf; *p; p++) {
423 if (*p == ALTSEP)
424 *p = SEP;
425 }
426 path = buf;
427#endif
428 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000429 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000430 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
431 path[len] == SEP) {
432 path = path + len + 1;
433 }
434
435 toc_entry = PyDict_GetItemString(self->files, path);
436 if (toc_entry == NULL) {
437 PyErr_Format(PyExc_IOError, "file not found [%.200s]",
438 path);
439 return NULL;
440 }
441 return get_data(PyString_AsString(self->archive), toc_entry);
442}
443
444static PyObject *
445zipimporter_get_code(PyObject *obj, PyObject *args)
446{
447 ZipImporter *self = (ZipImporter *)obj;
448 char *fullname;
449
450 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
451 return NULL;
452
453 return get_module_code(self, fullname, NULL, NULL);
454}
455
456static PyObject *
457zipimporter_get_source(PyObject *obj, PyObject *args)
458{
459 ZipImporter *self = (ZipImporter *)obj;
460 PyObject *toc_entry;
461 char *fullname, *subname, path[MAXPATHLEN+1];
462 int len;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000463 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000464
465 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
466 return NULL;
467
468 mi = get_module_info(self, fullname);
469 if (mi == MI_ERROR)
470 return NULL;
471 if (mi == MI_NOT_FOUND) {
472 PyErr_Format(ZipImportError, "can't find module '%.200s'",
473 fullname);
474 return NULL;
475 }
476 subname = get_subname(fullname);
477
478 len = make_filename(PyString_AsString(self->prefix), subname, path);
479 if (len < 0)
480 return NULL;
481
482 if (mi == MI_PACKAGE) {
483 path[len] = SEP;
484 strcpy(path + len + 1, "__init__.py");
485 }
486 else
487 strcpy(path + len, ".py");
488
489 toc_entry = PyDict_GetItemString(self->files, path);
490 if (toc_entry != NULL)
491 return get_data(PyString_AsString(self->archive), toc_entry);
492
493 /* we have the module, but no source */
494 Py_INCREF(Py_None);
495 return Py_None;
496}
497
498PyDoc_STRVAR(doc_find_module,
499"find_module(fullname, path=None) -> self or None.\n\
500\n\
501Search for a module specified by 'fullname'. 'fullname' must be the\n\
502fully qualified (dotted) module name. It returns the zipimporter\n\
503instance itself if the module was found, or None if it wasn't.\n\
504The optional 'path' argument is ignored -- it's there for compatibility\n\
505with the importer protocol.");
506
507PyDoc_STRVAR(doc_load_module,
508"load_module(fullname) -> module.\n\
509\n\
510Load the module specified by 'fullname'. 'fullname' must be the\n\
511fully qualified (dotted) module name. It returns the imported\n\
512module, or raises ZipImportError if it wasn't found.");
513
514PyDoc_STRVAR(doc_get_data,
515"get_data(pathname) -> string with file data.\n\
516\n\
517Return the data associated with 'pathname'. Raise IOError if\n\
518the file wasn't found.");
519
520PyDoc_STRVAR(doc_is_package,
521"is_package(fullname) -> bool.\n\
522\n\
523Return True if the module specified by fullname is a package.\n\
524Raise ZipImportError is the module couldn't be found.");
525
526PyDoc_STRVAR(doc_get_code,
527"get_code(fullname) -> code object.\n\
528\n\
529Return the code object for the specified module. Raise ZipImportError\n\
530is the module couldn't be found.");
531
532PyDoc_STRVAR(doc_get_source,
533"get_source(fullname) -> source string.\n\
534\n\
535Return the source code for the specified module. Raise ZipImportError\n\
536is the module couldn't be found, return None if the archive does\n\
537contain the module, but has no source for it.");
538
539static PyMethodDef zipimporter_methods[] = {
540 {"find_module", zipimporter_find_module, METH_VARARGS,
541 doc_find_module},
542 {"load_module", zipimporter_load_module, METH_VARARGS,
543 doc_load_module},
544 {"get_data", zipimporter_get_data, METH_VARARGS,
545 doc_get_data},
546 {"get_code", zipimporter_get_code, METH_VARARGS,
547 doc_get_code},
548 {"get_source", zipimporter_get_source, METH_VARARGS,
549 doc_get_source},
550 {"is_package", zipimporter_is_package, METH_VARARGS,
551 doc_is_package},
552 {NULL, NULL} /* sentinel */
553};
554
555static PyMemberDef zipimporter_members[] = {
556 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
557 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
558 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
559 {NULL}
560};
561
562PyDoc_STRVAR(zipimporter_doc,
563"zipimporter(archivepath) -> zipimporter object\n\
564\n\
565Create a new zipimporter instance. 'archivepath' must be a path to\n\
566a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
567a valid Zip archive.");
568
569#define DEFERRED_ADDRESS(ADDR) 0
570
571static PyTypeObject ZipImporter_Type = {
572 PyObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type))
573 0,
574 "zipimport.zipimporter",
575 sizeof(ZipImporter),
576 0, /* tp_itemsize */
577 (destructor)zipimporter_dealloc, /* tp_dealloc */
578 0, /* tp_print */
579 0, /* tp_getattr */
580 0, /* tp_setattr */
581 0, /* tp_compare */
582 (reprfunc)zipimporter_repr, /* tp_repr */
583 0, /* tp_as_number */
584 0, /* tp_as_sequence */
585 0, /* tp_as_mapping */
586 0, /* tp_hash */
587 0, /* tp_call */
588 0, /* tp_str */
589 PyObject_GenericGetAttr, /* tp_getattro */
590 0, /* tp_setattro */
591 0, /* tp_as_buffer */
592 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
593 Py_TPFLAGS_HAVE_GC, /* tp_flags */
594 zipimporter_doc, /* tp_doc */
595 zipimporter_traverse, /* tp_traverse */
596 0, /* tp_clear */
597 0, /* tp_richcompare */
598 0, /* tp_weaklistoffset */
599 0, /* tp_iter */
600 0, /* tp_iternext */
601 zipimporter_methods, /* tp_methods */
602 zipimporter_members, /* tp_members */
603 0, /* tp_getset */
604 0, /* tp_base */
605 0, /* tp_dict */
606 0, /* tp_descr_get */
607 0, /* tp_descr_set */
608 0, /* tp_dictoffset */
609 (initproc)zipimporter_init, /* tp_init */
610 PyType_GenericAlloc, /* tp_alloc */
611 PyType_GenericNew, /* tp_new */
612 PyObject_GC_Del, /* tp_free */
613};
614
615
616/* implementation */
617
Just van Rossum52e14d62002-12-30 22:08:05 +0000618/* Given a buffer, return the long that is represented by the first
619 4 bytes, encoded as little endian. This partially reimplements
620 marshal.c:r_long() */
621static long
622get_long(unsigned char *buf) {
623 long x;
624 x = buf[0];
625 x |= (long)buf[1] << 8;
626 x |= (long)buf[2] << 16;
627 x |= (long)buf[3] << 24;
628#if SIZEOF_LONG > 4
629 /* Sign extension for 64-bit machines */
630 x |= -(x & 0x80000000L);
631#endif
632 return x;
633}
634
635/*
636 read_directory(archive) -> files dict (new reference)
637
638 Given a path to a Zip archive, build a dict, mapping file names
639 (local to the archive, using SEP as a separator) to toc entries.
640
641 A toc_entry is a tuple:
642
643 (compress, # compression kind; 0 for uncompressed
644 data_size, # size of compressed data on disk
645 file_size, # size of decompressed data
646 file_offset, # offset of file header from start of archive
647 time, # mod time of file (in dos format)
648 date, # mod data of file (in dos format)
649 crc, # crc checksum of the data
650 )
651
652 Directories can be recognized by the trailing SEP in the name,
653 data_size and file_offset are 0.
654*/
655static PyObject *
656read_directory(char *archive)
657{
658 PyObject *files = NULL;
659 FILE *fp;
660 long compress, crc, data_size, file_size, file_offset, date, time;
Thomas Heller354e3d92003-07-22 18:10:15 +0000661 long header_offset, name_size, header_size, header_position;
Just van Rossum52e14d62002-12-30 22:08:05 +0000662 long i, l, length, count;
663 char path[MAXPATHLEN + 5];
664 char name[MAXPATHLEN + 5];
665 char *p, endof_central_dir[22];
Thomas Heller354e3d92003-07-22 18:10:15 +0000666 long arc_offset; /* offset from beginning of file to start of zip-archive */
Just van Rossum52e14d62002-12-30 22:08:05 +0000667
668 if (strlen(archive) > MAXPATHLEN) {
669 PyErr_SetString(PyExc_OverflowError,
670 "Zip path name is too long");
671 return NULL;
672 }
673 strcpy(path, archive);
674
675 fp = fopen(archive, "rb");
676 if (fp == NULL) {
677 PyErr_Format(ZipImportError, "can't open Zip file: "
678 "'%.200s'", archive);
679 return NULL;
680 }
Just van Rossumf4ecc752003-02-28 08:54:01 +0000681 fseek(fp, -22, SEEK_END);
Thomas Heller354e3d92003-07-22 18:10:15 +0000682 header_position = ftell(fp);
Just van Rossum52e14d62002-12-30 22:08:05 +0000683 if (fread(endof_central_dir, 1, 22, fp) != 22) {
684 fclose(fp);
685 PyErr_Format(ZipImportError, "can't read Zip file: "
686 "'%.200s'", archive);
687 return NULL;
688 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000689 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000690 /* Bad: End of Central Dir signature */
691 fclose(fp);
692 PyErr_Format(ZipImportError, "not a Zip file: "
693 "'%.200s'", archive);
694 return NULL;
695 }
696
Thomas Heller354e3d92003-07-22 18:10:15 +0000697 header_size = get_long((unsigned char *)endof_central_dir + 12);
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000698 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Thomas Heller354e3d92003-07-22 18:10:15 +0000699 arc_offset = header_position - header_offset - header_size;
700 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000701
702 files = PyDict_New();
703 if (files == NULL)
704 goto error;
705
706 length = (long)strlen(path);
707 path[length] = SEP;
708
709 /* Start of Central Directory */
710 count = 0;
711 for (;;) {
712 PyObject *t;
713 int err;
714
715 fseek(fp, header_offset, 0); /* Start of file header */
716 l = PyMarshal_ReadLongFromFile(fp);
717 if (l != 0x02014B50)
718 break; /* Bad: Central Dir File Header */
719 fseek(fp, header_offset + 10, 0);
720 compress = PyMarshal_ReadShortFromFile(fp);
721 time = PyMarshal_ReadShortFromFile(fp);
722 date = PyMarshal_ReadShortFromFile(fp);
723 crc = PyMarshal_ReadLongFromFile(fp);
724 data_size = PyMarshal_ReadLongFromFile(fp);
725 file_size = PyMarshal_ReadLongFromFile(fp);
726 name_size = PyMarshal_ReadShortFromFile(fp);
727 header_size = 46 + name_size +
728 PyMarshal_ReadShortFromFile(fp) +
729 PyMarshal_ReadShortFromFile(fp);
730 fseek(fp, header_offset + 42, 0);
Thomas Heller354e3d92003-07-22 18:10:15 +0000731 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000732 if (name_size > MAXPATHLEN)
733 name_size = MAXPATHLEN;
734
735 p = name;
736 for (i = 0; i < name_size; i++) {
737 *p = (char)getc(fp);
738 if (*p == '/')
739 *p = SEP;
740 p++;
741 }
742 *p = 0; /* Add terminating null byte */
743 header_offset += header_size;
744
745 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
746
747 t = Py_BuildValue("siiiiiii", path, compress, data_size,
748 file_size, file_offset, time, date, crc);
749 if (t == NULL)
750 goto error;
751 err = PyDict_SetItemString(files, name, t);
752 Py_DECREF(t);
753 if (err != 0)
754 goto error;
755 count++;
756 }
757 fclose(fp);
758 if (Py_VerboseFlag)
759 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
760 count, archive);
761 return files;
762error:
763 fclose(fp);
764 Py_XDECREF(files);
765 return NULL;
766}
767
768/* Return the zlib.decompress function object, or NULL if zlib couldn't
769 be imported. The function is cached when found, so subsequent calls
770 don't import zlib again. Returns a *borrowed* reference.
771 XXX This makes zlib.decompress immortal. */
772static PyObject *
773get_decompress_func(void)
774{
775 static PyObject *decompress = NULL;
776
777 if (decompress == NULL) {
778 PyObject *zlib;
779 static int importing_zlib = 0;
780
781 if (importing_zlib != 0)
782 /* Someone has a zlib.py[co] in their Zip file;
783 let's avoid a stack overflow. */
784 return NULL;
785 importing_zlib = 1;
786 zlib = PyImport_ImportModule("zlib"); /* import zlib */
787 importing_zlib = 0;
788 if (zlib != NULL) {
789 decompress = PyObject_GetAttrString(zlib,
790 "decompress");
791 Py_DECREF(zlib);
792 }
793 else
794 PyErr_Clear();
795 if (Py_VerboseFlag)
796 PySys_WriteStderr("# zipimport: zlib %s\n",
797 zlib != NULL ? "available": "UNAVAILABLE");
798 }
799 return decompress;
800}
801
802/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
803 data as a new reference. */
804static PyObject *
805get_data(char *archive, PyObject *toc_entry)
806{
807 PyObject *raw_data, *data = NULL, *decompress;
808 char *buf;
809 FILE *fp;
810 int err, bytes_read = 0;
811 long l;
812 char *datapath;
813 long compress, data_size, file_size, file_offset;
814 long time, date, crc;
815
Neal Norwitz0c0aad92003-02-18 03:37:49 +0000816 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
Just van Rossum52e14d62002-12-30 22:08:05 +0000817 &data_size, &file_size, &file_offset, &time,
818 &date, &crc)) {
819 return NULL;
820 }
821
822 fp = fopen(archive, "rb");
823 if (!fp) {
824 PyErr_Format(PyExc_IOError,
825 "zipimport: can not open file %s", archive);
826 return NULL;
827 }
828
829 /* Check to make sure the local file header is correct */
830 fseek(fp, file_offset, 0);
831 l = PyMarshal_ReadLongFromFile(fp);
832 if (l != 0x04034B50) {
833 /* Bad: Local File Header */
834 PyErr_Format(ZipImportError,
835 "bad local file header in %s",
836 archive);
837 fclose(fp);
838 return NULL;
839 }
840 fseek(fp, file_offset + 26, 0);
841 l = 30 + PyMarshal_ReadShortFromFile(fp) +
842 PyMarshal_ReadShortFromFile(fp); /* local header size */
843 file_offset += l; /* Start of file data */
844
845 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
846 data_size : data_size + 1);
847 if (raw_data == NULL) {
848 fclose(fp);
849 return NULL;
850 }
851 buf = PyString_AsString(raw_data);
852
853 err = fseek(fp, file_offset, 0);
854 if (err == 0)
855 bytes_read = fread(buf, 1, data_size, fp);
856 fclose(fp);
857 if (err || bytes_read != data_size) {
858 PyErr_SetString(PyExc_IOError,
859 "zipimport: can't read data");
860 Py_DECREF(raw_data);
861 return NULL;
862 }
863
864 if (compress != 0) {
865 buf[data_size] = 'Z'; /* saw this in zipfile.py */
866 data_size++;
867 }
868 buf[data_size] = '\0';
869
870 if (compress == 0) /* data is not compressed */
871 return raw_data;
872
873 /* Decompress with zlib */
874 decompress = get_decompress_func();
875 if (decompress == NULL) {
876 PyErr_SetString(ZipImportError,
877 "can't decompress data; "
878 "zlib not available");
879 goto error;
880 }
Just van Rossumee8f10f2003-09-07 13:36:48 +0000881 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000882error:
883 Py_DECREF(raw_data);
884 return data;
885}
886
887/* Lenient date/time comparison function. The precision of the mtime
888 in the archive is lower than the mtime stored in a .pyc: we
889 must allow a difference of at most one second. */
890static int
891eq_mtime(time_t t1, time_t t2)
892{
893 time_t d = t1 - t2;
894 if (d < 0)
895 d = -d;
896 /* dostime only stores even seconds, so be lenient */
897 return d <= 1;
898}
899
900/* Given the contents of a .py[co] file in a buffer, unmarshal the data
901 and return the code object. Return None if it the magic word doesn't
902 match (we do this instead of raising an exception as we fall back
903 to .py if available and we don't want to mask other errors).
904 Returns a new reference. */
905static PyObject *
906unmarshal_code(char *pathname, PyObject *data, time_t mtime)
907{
908 PyObject *code;
909 char *buf = PyString_AsString(data);
910 int size = PyString_Size(data);
911
912 if (size <= 9) {
913 PyErr_SetString(ZipImportError,
914 "bad pyc data");
915 return NULL;
916 }
917
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000918 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000919 if (Py_VerboseFlag)
920 PySys_WriteStderr("# %s has bad magic\n",
921 pathname);
922 Py_INCREF(Py_None);
923 return Py_None; /* signal caller to try alternative */
924 }
925
Just van Rossum9a3129c2003-01-03 11:18:56 +0000926 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
927 mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000928 if (Py_VerboseFlag)
929 PySys_WriteStderr("# %s has bad mtime\n",
930 pathname);
931 Py_INCREF(Py_None);
932 return Py_None; /* signal caller to try alternative */
933 }
934
935 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
936 if (code == NULL)
937 return NULL;
938 if (!PyCode_Check(code)) {
939 Py_DECREF(code);
940 PyErr_Format(PyExc_TypeError,
941 "compiled module %.200s is not a code object",
942 pathname);
943 return NULL;
944 }
945 return code;
946}
947
948/* Replace any occurances of "\r\n?" in the input string with "\n".
949 This converts DOS and Mac line endings to Unix line endings.
950 Also append a trailing "\n" to be compatible with
951 PyParser_SimpleParseFile(). Returns a new reference. */
952static PyObject *
953normalize_line_endings(PyObject *source)
954{
Just van Rossum9a3129c2003-01-03 11:18:56 +0000955 char *buf, *q, *p = PyString_AsString(source);
Just van Rossum52e14d62002-12-30 22:08:05 +0000956 PyObject *fixed_source;
957
Just van Rossum9a3129c2003-01-03 11:18:56 +0000958 /* one char extra for trailing \n and one for terminating \0 */
959 buf = PyMem_Malloc(PyString_Size(source) + 2);
960 if (buf == NULL) {
961 PyErr_SetString(PyExc_MemoryError,
962 "zipimport: no memory to allocate "
963 "source buffer");
Just van Rossum52e14d62002-12-30 22:08:05 +0000964 return NULL;
Just van Rossum9a3129c2003-01-03 11:18:56 +0000965 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000966 /* replace "\r\n?" by "\n" */
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000967 for (q = buf; *p != '\0'; p++) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000968 if (*p == '\r') {
969 *q++ = '\n';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000970 if (*(p + 1) == '\n')
Just van Rossum52e14d62002-12-30 22:08:05 +0000971 p++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000972 }
973 else
974 *q++ = *p;
Just van Rossum52e14d62002-12-30 22:08:05 +0000975 }
976 *q++ = '\n'; /* add trailing \n */
977 *q = '\0';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000978 fixed_source = PyString_FromString(buf);
979 PyMem_Free(buf);
Just van Rossum52e14d62002-12-30 22:08:05 +0000980 return fixed_source;
981}
982
983/* Given a string buffer containing Python source code, compile it
984 return and return a code object as a new reference. */
985static PyObject *
986compile_source(char *pathname, PyObject *source)
987{
988 PyObject *code, *fixed_source;
989
990 fixed_source = normalize_line_endings(source);
991 if (fixed_source == NULL)
992 return NULL;
993
994 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
995 Py_file_input);
996 Py_DECREF(fixed_source);
997 return code;
998}
999
1000/* Convert the date/time values found in the Zip archive to a value
1001 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001002static time_t
1003parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001004{
1005 struct tm stm;
1006
1007 stm.tm_sec = (dostime & 0x1f) * 2;
1008 stm.tm_min = (dostime >> 5) & 0x3f;
1009 stm.tm_hour = (dostime >> 11) & 0x1f;
1010 stm.tm_mday = dosdate & 0x1f;
1011 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1012 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
Just van Rossum547eb422003-04-08 20:07:15 +00001013 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001014
1015 return mktime(&stm);
1016}
1017
1018/* Given a path to a .pyc or .pyo file in the archive, return the
1019 modifictaion time of the matching .py file, or 0 if no source
1020 is available. */
1021static time_t
1022get_mtime_of_source(ZipImporter *self, char *path)
1023{
1024 PyObject *toc_entry;
1025 time_t mtime = 0;
1026 int lastchar = strlen(path) - 1;
1027 char savechar = path[lastchar];
1028 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1029 toc_entry = PyDict_GetItemString(self->files, path);
1030 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1031 PyTuple_Size(toc_entry) == 8) {
Just van Rossumf8b6de12002-12-31 09:51:59 +00001032 /* fetch the time stamp of the .py file for comparison
1033 with an embedded pyc time stamp */
1034 int time, date;
Just van Rossum52e14d62002-12-30 22:08:05 +00001035 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1036 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1037 mtime = parse_dostime(time, date);
1038 }
1039 path[lastchar] = savechar;
1040 return mtime;
1041}
1042
1043/* Return the code object for the module named by 'fullname' from the
1044 Zip archive as a new reference. */
1045static PyObject *
1046get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1047 time_t mtime, PyObject *toc_entry)
1048{
1049 PyObject *data, *code;
1050 char *modpath;
1051 char *archive = PyString_AsString(self->archive);
1052
1053 if (archive == NULL)
1054 return NULL;
1055
1056 data = get_data(archive, toc_entry);
1057 if (data == NULL)
1058 return NULL;
1059
1060 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1061
1062 if (isbytecode) {
1063 code = unmarshal_code(modpath, data, mtime);
1064 }
1065 else {
1066 code = compile_source(modpath, data);
1067 }
1068 Py_DECREF(data);
1069 return code;
1070}
1071
1072/* Get the code object assoiciated with the module specified by
1073 'fullname'. */
1074static PyObject *
1075get_module_code(ZipImporter *self, char *fullname,
1076 int *p_ispackage, char **p_modpath)
1077{
1078 PyObject *toc_entry;
1079 char *subname, path[MAXPATHLEN + 1];
1080 int len;
1081 struct st_zip_searchorder *zso;
1082
1083 subname = get_subname(fullname);
1084
1085 len = make_filename(PyString_AsString(self->prefix), subname, path);
1086 if (len < 0)
1087 return NULL;
1088
1089 for (zso = zip_searchorder; *zso->suffix; zso++) {
1090 PyObject *code = NULL;
1091
1092 strcpy(path + len, zso->suffix);
1093 if (Py_VerboseFlag > 1)
1094 PySys_WriteStderr("# trying %s%c%s\n",
1095 PyString_AsString(self->archive),
1096 SEP, path);
1097 toc_entry = PyDict_GetItemString(self->files, path);
1098 if (toc_entry != NULL) {
1099 time_t mtime = 0;
1100 int ispackage = zso->type & IS_PACKAGE;
1101 int isbytecode = zso->type & IS_BYTECODE;
1102
1103 if (isbytecode)
1104 mtime = get_mtime_of_source(self, path);
1105 if (p_ispackage != NULL)
1106 *p_ispackage = ispackage;
1107 code = get_code_from_data(self, ispackage,
1108 isbytecode, mtime,
1109 toc_entry);
1110 if (code == Py_None) {
1111 /* bad magic number or non-matching mtime
1112 in byte code, try next */
1113 Py_DECREF(code);
1114 continue;
1115 }
1116 if (code != NULL && p_modpath != NULL)
1117 *p_modpath = PyString_AsString(
1118 PyTuple_GetItem(toc_entry, 0));
1119 return code;
1120 }
1121 }
1122 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1123 return NULL;
1124}
1125
1126
1127/* Module init */
1128
1129PyDoc_STRVAR(zipimport_doc,
1130"zipimport provides support for importing Python modules from Zip archives.\n\
1131\n\
1132This module exports three objects:\n\
1133- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1134- ZipImporterError: exception raised by zipimporter objects. It's a\n\
1135 subclass of ImportError, so it can be caught as ImportError, too.\n\
1136- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1137 info dicts, as used in zipimporter._files.\n\
1138\n\
1139It is usually not needed to use the zipimport module explicitly; it is\n\
1140used by the builtin import mechanism for sys.path items that are paths\n\
1141to Zip archives.");
1142
1143PyMODINIT_FUNC
1144initzipimport(void)
1145{
1146 PyObject *mod;
1147
1148 if (PyType_Ready(&ZipImporter_Type) < 0)
1149 return;
1150
1151 /* Correct directory separator */
1152 zip_searchorder[0].suffix[0] = SEP;
1153 zip_searchorder[1].suffix[0] = SEP;
1154 zip_searchorder[2].suffix[0] = SEP;
1155 if (Py_OptimizeFlag) {
1156 /* Reverse *.pyc and *.pyo */
1157 struct st_zip_searchorder tmp;
1158 tmp = zip_searchorder[0];
1159 zip_searchorder[0] = zip_searchorder[1];
1160 zip_searchorder[1] = tmp;
1161 tmp = zip_searchorder[3];
1162 zip_searchorder[3] = zip_searchorder[4];
1163 zip_searchorder[4] = tmp;
1164 }
1165
1166 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1167 NULL, PYTHON_API_VERSION);
1168
1169 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1170 PyExc_ImportError, NULL);
1171 if (ZipImportError == NULL)
1172 return;
1173
1174 Py_INCREF(ZipImportError);
1175 if (PyModule_AddObject(mod, "ZipImportError",
1176 ZipImportError) < 0)
1177 return;
1178
1179 Py_INCREF(&ZipImporter_Type);
1180 if (PyModule_AddObject(mod, "zipimporter",
1181 (PyObject *)&ZipImporter_Type) < 0)
1182 return;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001183
Just van Rossum52e14d62002-12-30 22:08:05 +00001184 zip_directory_cache = PyDict_New();
1185 if (zip_directory_cache == NULL)
1186 return;
1187 Py_INCREF(zip_directory_cache);
1188 if (PyModule_AddObject(mod, "_zip_directory_cache",
1189 zip_directory_cache) < 0)
1190 return;
1191}