blob: 637dc48aa5dc9eb524e9fc8a3dc12c9682d6dbdd [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
13 char suffix[14];
14 int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Just van Rossum52e14d62002-12-30 22:08:05 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
41};
42
43static PyTypeObject ZipImporter_Type;
44static PyObject *ZipImportError;
45static PyObject *zip_directory_cache = NULL;
46
47/* forward decls */
48static PyObject *read_directory(char *archive);
49static PyObject *get_data(char *archive, PyObject *toc_entry);
50static PyObject *get_module_code(ZipImporter *self, char *fullname,
51 int *p_ispackage, char **p_modpath);
52
53
54#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
55
56
57/* zipimporter.__init__
58 Split the "subdirectory" from the Zip archive path, lookup a matching
59 entry in sys.path_importer_cache, fetch the file directory from there
60 if found, or else read it from the archive. */
61static int
62zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
63{
64 char *path, *p, *prefix, buf[MAXPATHLEN+2];
Neal Norwitzd39d8612006-01-08 01:03:36 +000065 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +000066
Georg Brandl02c42872005-08-26 06:42:30 +000067 if (!_PyArg_NoKeywords("zipimporter()", kwds))
68 return -1;
69
Just van Rossum52e14d62002-12-30 22:08:05 +000070 if (!PyArg_ParseTuple(args, "s:zipimporter",
71 &path))
72 return -1;
73
74 len = strlen(path);
75 if (len == 0) {
76 PyErr_SetString(ZipImportError, "archive path is empty");
77 return -1;
78 }
79 if (len >= MAXPATHLEN) {
80 PyErr_SetString(ZipImportError,
81 "archive path too long");
82 return -1;
83 }
84 strcpy(buf, path);
85
86#ifdef ALTSEP
87 for (p = buf; *p; p++) {
88 if (*p == ALTSEP)
89 *p = SEP;
90 }
91#endif
92
93 path = NULL;
94 prefix = NULL;
95 for (;;) {
Martin v. Löwisa94568a2003-05-10 07:36:56 +000096#ifndef RISCOS
Just van Rossum52e14d62002-12-30 22:08:05 +000097 struct stat statbuf;
98 int rv;
99
100 rv = stat(buf, &statbuf);
101 if (rv == 0) {
102 /* it exists */
103 if (S_ISREG(statbuf.st_mode))
104 /* it's a file */
105 path = buf;
106 break;
107 }
Martin v. Löwisa94568a2003-05-10 07:36:56 +0000108#else
109 if (object_exists(buf)) {
110 /* it exists */
111 if (isfile(buf))
112 /* it's a file */
113 path = buf;
114 break;
115 }
116#endif
Just van Rossum52e14d62002-12-30 22:08:05 +0000117 /* back up one path element */
Just van Rossumd35c6db2003-01-02 12:55:48 +0000118 p = strrchr(buf, SEP);
Just van Rossum52e14d62002-12-30 22:08:05 +0000119 if (prefix != NULL)
120 *prefix = SEP;
121 if (p == NULL)
122 break;
123 *p = '\0';
124 prefix = p;
125 }
126 if (path != NULL) {
127 PyObject *files;
128 files = PyDict_GetItemString(zip_directory_cache, path);
129 if (files == NULL) {
130 files = read_directory(buf);
131 if (files == NULL)
132 return -1;
133 if (PyDict_SetItemString(zip_directory_cache, path,
134 files) != 0)
135 return -1;
136 }
137 else
138 Py_INCREF(files);
139 self->files = files;
140 }
141 else {
142 PyErr_SetString(ZipImportError, "not a Zip file");
143 return -1;
144 }
145
146 if (prefix == NULL)
147 prefix = "";
148 else {
149 prefix++;
150 len = strlen(prefix);
151 if (prefix[len-1] != SEP) {
152 /* add trailing SEP */
153 prefix[len] = SEP;
154 prefix[len + 1] = '\0';
155 }
156 }
157
158 self->archive = PyString_FromString(buf);
159 if (self->archive == NULL)
160 return -1;
161
162 self->prefix = PyString_FromString(prefix);
163 if (self->prefix == NULL)
164 return -1;
165
166 return 0;
167}
168
169/* GC support. */
170static int
171zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
172{
173 ZipImporter *self = (ZipImporter *)obj;
174 int err;
175
176 if (self->files != NULL) {
177 err = visit(self->files, arg);
178 if (err)
179 return err;
180 }
181 return 0;
182}
183
184static void
185zipimporter_dealloc(ZipImporter *self)
186{
187 PyObject_GC_UnTrack(self);
188 Py_XDECREF(self->archive);
Just van Rossumf8b6de12002-12-31 09:51:59 +0000189 Py_XDECREF(self->prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000190 Py_XDECREF(self->files);
191 self->ob_type->tp_free((PyObject *)self);
192}
193
194static PyObject *
195zipimporter_repr(ZipImporter *self)
196{
197 char buf[500];
198 char *archive = "???";
199 char *prefix = "";
200
201 if (self->archive != NULL && PyString_Check(self->archive))
202 archive = PyString_AsString(self->archive);
203 if (self->prefix != NULL && PyString_Check(self->prefix))
204 prefix = PyString_AsString(self->prefix);
205 if (prefix != NULL && *prefix)
206 PyOS_snprintf(buf, sizeof(buf),
207 "<zipimporter object \"%.300s%c%.150s\">",
208 archive, SEP, prefix);
209 else
210 PyOS_snprintf(buf, sizeof(buf),
211 "<zipimporter object \"%.300s\">",
212 archive);
213 return PyString_FromString(buf);
214}
215
216/* return fullname.split(".")[-1] */
217static char *
218get_subname(char *fullname)
219{
220 char *subname = strrchr(fullname, '.');
221 if (subname == NULL)
222 subname = fullname;
223 else
224 subname++;
225 return subname;
226}
227
228/* Given a (sub)modulename, write the potential file path in the
229 archive (without extension) to the path buffer. Return the
230 length of the resulting string. */
231static int
232make_filename(char *prefix, char *name, char *path)
233{
Neal Norwitzd39d8612006-01-08 01:03:36 +0000234 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000235 char *p;
236
237 len = strlen(prefix);
238
239 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
240 if (len + strlen(name) + 13 >= MAXPATHLEN) {
241 PyErr_SetString(ZipImportError, "path too long");
242 return -1;
243 }
244
245 strcpy(path, prefix);
246 strcpy(path + len, name);
247 for (p = path + len; *p; p++) {
248 if (*p == '.')
249 *p = SEP;
250 }
251 len += strlen(name);
Martin v. Löwis18e16552006-02-15 17:27:45 +0000252 assert(len < INT_MAX);
Neal Norwitzd39d8612006-01-08 01:03:36 +0000253 return (int)len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000254}
255
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000256enum zi_module_info {
Just van Rossum52e14d62002-12-30 22:08:05 +0000257 MI_ERROR,
258 MI_NOT_FOUND,
259 MI_MODULE,
260 MI_PACKAGE
261};
262
263/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000264static enum zi_module_info
Just van Rossum52e14d62002-12-30 22:08:05 +0000265get_module_info(ZipImporter *self, char *fullname)
266{
267 char *subname, path[MAXPATHLEN + 1];
268 int len;
269 struct st_zip_searchorder *zso;
270
271 subname = get_subname(fullname);
272
273 len = make_filename(PyString_AsString(self->prefix), subname, path);
274 if (len < 0)
275 return MI_ERROR;
276
277 for (zso = zip_searchorder; *zso->suffix; zso++) {
278 strcpy(path + len, zso->suffix);
279 if (PyDict_GetItemString(self->files, path) != NULL) {
280 if (zso->type & IS_PACKAGE)
281 return MI_PACKAGE;
282 else
283 return MI_MODULE;
284 }
285 }
286 return MI_NOT_FOUND;
287}
288
289/* Check whether we can satisfy the import of the module named by
290 'fullname'. Return self if we can, None if we can't. */
291static PyObject *
292zipimporter_find_module(PyObject *obj, PyObject *args)
293{
294 ZipImporter *self = (ZipImporter *)obj;
295 PyObject *path = NULL;
296 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000297 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000298
299 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
300 &fullname, &path))
301 return NULL;
302
303 mi = get_module_info(self, fullname);
304 if (mi == MI_ERROR)
305 return NULL;
306 if (mi == MI_NOT_FOUND) {
307 Py_INCREF(Py_None);
308 return Py_None;
309 }
310 Py_INCREF(self);
311 return (PyObject *)self;
312}
313
314/* Load and return the module named by 'fullname'. */
315static PyObject *
316zipimporter_load_module(PyObject *obj, PyObject *args)
317{
318 ZipImporter *self = (ZipImporter *)obj;
319 PyObject *code, *mod, *dict;
320 char *fullname, *modpath;
321 int ispackage;
322
323 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
324 &fullname))
325 return NULL;
326
327 code = get_module_code(self, fullname, &ispackage, &modpath);
328 if (code == NULL)
329 return NULL;
330
331 mod = PyImport_AddModule(fullname);
332 if (mod == NULL) {
333 Py_DECREF(code);
334 return NULL;
335 }
336 dict = PyModule_GetDict(mod);
337
338 /* mod.__loader__ = self */
339 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
340 goto error;
341
342 if (ispackage) {
343 /* add __path__ to the module *before* the code gets
344 executed */
345 PyObject *pkgpath, *fullpath;
346 char *prefix = PyString_AsString(self->prefix);
347 char *subname = get_subname(fullname);
348 int err;
349
350 fullpath = PyString_FromFormat("%s%c%s%s",
351 PyString_AsString(self->archive),
352 SEP,
353 *prefix ? prefix : "",
354 subname);
355 if (fullpath == NULL)
356 goto error;
357
358 pkgpath = Py_BuildValue("[O]", fullpath);
359 Py_DECREF(fullpath);
360 if (pkgpath == NULL)
361 goto error;
362 err = PyDict_SetItemString(dict, "__path__", pkgpath);
363 Py_DECREF(pkgpath);
364 if (err != 0)
365 goto error;
366 }
367 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
368 Py_DECREF(code);
369 if (Py_VerboseFlag)
370 PySys_WriteStderr("import %s # loaded from Zip %s\n",
371 fullname, modpath);
372 return mod;
373error:
374 Py_DECREF(code);
375 Py_DECREF(mod);
376 return NULL;
377}
378
379/* Return a bool signifying whether the module is a package or not. */
380static PyObject *
381zipimporter_is_package(PyObject *obj, PyObject *args)
382{
383 ZipImporter *self = (ZipImporter *)obj;
384 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000385 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000386
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000387 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
Just van Rossum52e14d62002-12-30 22:08:05 +0000388 &fullname))
389 return NULL;
390
391 mi = get_module_info(self, fullname);
392 if (mi == MI_ERROR)
393 return NULL;
394 if (mi == MI_NOT_FOUND) {
395 PyErr_Format(ZipImportError, "can't find module '%.200s'",
396 fullname);
397 return NULL;
398 }
399 return PyBool_FromLong(mi == MI_PACKAGE);
400}
401
402static PyObject *
403zipimporter_get_data(PyObject *obj, PyObject *args)
404{
405 ZipImporter *self = (ZipImporter *)obj;
406 char *path;
407#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000408 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000409#endif
410 PyObject *toc_entry;
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000411 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000412
413 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
414 return NULL;
415
416#ifdef ALTSEP
417 if (strlen(path) >= MAXPATHLEN) {
418 PyErr_SetString(ZipImportError, "path too long");
419 return NULL;
420 }
421 strcpy(buf, path);
422 for (p = buf; *p; p++) {
423 if (*p == ALTSEP)
424 *p = SEP;
425 }
426 path = buf;
427#endif
428 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000429 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000430 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
431 path[len] == SEP) {
432 path = path + len + 1;
433 }
434
435 toc_entry = PyDict_GetItemString(self->files, path);
436 if (toc_entry == NULL) {
Georg Brandle9b19492006-02-19 09:38:58 +0000437 PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
Just van Rossum52e14d62002-12-30 22:08:05 +0000438 return NULL;
439 }
440 return get_data(PyString_AsString(self->archive), toc_entry);
441}
442
443static PyObject *
444zipimporter_get_code(PyObject *obj, PyObject *args)
445{
446 ZipImporter *self = (ZipImporter *)obj;
447 char *fullname;
448
449 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
450 return NULL;
451
452 return get_module_code(self, fullname, NULL, NULL);
453}
454
455static PyObject *
456zipimporter_get_source(PyObject *obj, PyObject *args)
457{
458 ZipImporter *self = (ZipImporter *)obj;
459 PyObject *toc_entry;
460 char *fullname, *subname, path[MAXPATHLEN+1];
461 int len;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000462 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000463
464 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
465 return NULL;
466
467 mi = get_module_info(self, fullname);
468 if (mi == MI_ERROR)
469 return NULL;
470 if (mi == MI_NOT_FOUND) {
471 PyErr_Format(ZipImportError, "can't find module '%.200s'",
472 fullname);
473 return NULL;
474 }
475 subname = get_subname(fullname);
476
477 len = make_filename(PyString_AsString(self->prefix), subname, path);
478 if (len < 0)
479 return NULL;
480
481 if (mi == MI_PACKAGE) {
482 path[len] = SEP;
483 strcpy(path + len + 1, "__init__.py");
484 }
485 else
486 strcpy(path + len, ".py");
487
488 toc_entry = PyDict_GetItemString(self->files, path);
489 if (toc_entry != NULL)
490 return get_data(PyString_AsString(self->archive), toc_entry);
491
492 /* we have the module, but no source */
493 Py_INCREF(Py_None);
494 return Py_None;
495}
496
497PyDoc_STRVAR(doc_find_module,
498"find_module(fullname, path=None) -> self or None.\n\
499\n\
500Search for a module specified by 'fullname'. 'fullname' must be the\n\
501fully qualified (dotted) module name. It returns the zipimporter\n\
502instance itself if the module was found, or None if it wasn't.\n\
503The optional 'path' argument is ignored -- it's there for compatibility\n\
504with the importer protocol.");
505
506PyDoc_STRVAR(doc_load_module,
507"load_module(fullname) -> module.\n\
508\n\
509Load the module specified by 'fullname'. 'fullname' must be the\n\
510fully qualified (dotted) module name. It returns the imported\n\
511module, or raises ZipImportError if it wasn't found.");
512
513PyDoc_STRVAR(doc_get_data,
514"get_data(pathname) -> string with file data.\n\
515\n\
516Return the data associated with 'pathname'. Raise IOError if\n\
517the file wasn't found.");
518
519PyDoc_STRVAR(doc_is_package,
520"is_package(fullname) -> bool.\n\
521\n\
522Return True if the module specified by fullname is a package.\n\
523Raise ZipImportError is the module couldn't be found.");
524
525PyDoc_STRVAR(doc_get_code,
526"get_code(fullname) -> code object.\n\
527\n\
528Return the code object for the specified module. Raise ZipImportError\n\
529is the module couldn't be found.");
530
531PyDoc_STRVAR(doc_get_source,
532"get_source(fullname) -> source string.\n\
533\n\
534Return the source code for the specified module. Raise ZipImportError\n\
535is the module couldn't be found, return None if the archive does\n\
536contain the module, but has no source for it.");
537
538static PyMethodDef zipimporter_methods[] = {
539 {"find_module", zipimporter_find_module, METH_VARARGS,
540 doc_find_module},
541 {"load_module", zipimporter_load_module, METH_VARARGS,
542 doc_load_module},
543 {"get_data", zipimporter_get_data, METH_VARARGS,
544 doc_get_data},
545 {"get_code", zipimporter_get_code, METH_VARARGS,
546 doc_get_code},
547 {"get_source", zipimporter_get_source, METH_VARARGS,
548 doc_get_source},
549 {"is_package", zipimporter_is_package, METH_VARARGS,
550 doc_is_package},
551 {NULL, NULL} /* sentinel */
552};
553
554static PyMemberDef zipimporter_members[] = {
555 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
556 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
557 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
558 {NULL}
559};
560
561PyDoc_STRVAR(zipimporter_doc,
562"zipimporter(archivepath) -> zipimporter object\n\
563\n\
564Create a new zipimporter instance. 'archivepath' must be a path to\n\
565a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
566a valid Zip archive.");
567
568#define DEFERRED_ADDRESS(ADDR) 0
569
570static PyTypeObject ZipImporter_Type = {
571 PyObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type))
572 0,
573 "zipimport.zipimporter",
574 sizeof(ZipImporter),
575 0, /* tp_itemsize */
576 (destructor)zipimporter_dealloc, /* tp_dealloc */
577 0, /* tp_print */
578 0, /* tp_getattr */
579 0, /* tp_setattr */
580 0, /* tp_compare */
581 (reprfunc)zipimporter_repr, /* tp_repr */
582 0, /* tp_as_number */
583 0, /* tp_as_sequence */
584 0, /* tp_as_mapping */
585 0, /* tp_hash */
586 0, /* tp_call */
587 0, /* tp_str */
588 PyObject_GenericGetAttr, /* tp_getattro */
589 0, /* tp_setattro */
590 0, /* tp_as_buffer */
591 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
592 Py_TPFLAGS_HAVE_GC, /* tp_flags */
593 zipimporter_doc, /* tp_doc */
594 zipimporter_traverse, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /* tp_weaklistoffset */
598 0, /* tp_iter */
599 0, /* tp_iternext */
600 zipimporter_methods, /* tp_methods */
601 zipimporter_members, /* tp_members */
602 0, /* tp_getset */
603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
608 (initproc)zipimporter_init, /* tp_init */
609 PyType_GenericAlloc, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
611 PyObject_GC_Del, /* tp_free */
612};
613
614
615/* implementation */
616
Just van Rossum52e14d62002-12-30 22:08:05 +0000617/* Given a buffer, return the long that is represented by the first
618 4 bytes, encoded as little endian. This partially reimplements
619 marshal.c:r_long() */
620static long
621get_long(unsigned char *buf) {
622 long x;
623 x = buf[0];
624 x |= (long)buf[1] << 8;
625 x |= (long)buf[2] << 16;
626 x |= (long)buf[3] << 24;
627#if SIZEOF_LONG > 4
628 /* Sign extension for 64-bit machines */
629 x |= -(x & 0x80000000L);
630#endif
631 return x;
632}
633
634/*
635 read_directory(archive) -> files dict (new reference)
636
637 Given a path to a Zip archive, build a dict, mapping file names
638 (local to the archive, using SEP as a separator) to toc entries.
639
640 A toc_entry is a tuple:
641
Fred Drakef5b7fd22005-11-11 19:34:56 +0000642 (__file__, # value to use for __file__, available for all files
643 compress, # compression kind; 0 for uncompressed
Just van Rossum52e14d62002-12-30 22:08:05 +0000644 data_size, # size of compressed data on disk
645 file_size, # size of decompressed data
646 file_offset, # offset of file header from start of archive
647 time, # mod time of file (in dos format)
648 date, # mod data of file (in dos format)
649 crc, # crc checksum of the data
650 )
651
652 Directories can be recognized by the trailing SEP in the name,
653 data_size and file_offset are 0.
654*/
655static PyObject *
656read_directory(char *archive)
657{
658 PyObject *files = NULL;
659 FILE *fp;
660 long compress, crc, data_size, file_size, file_offset, date, time;
Thomas Heller354e3d92003-07-22 18:10:15 +0000661 long header_offset, name_size, header_size, header_position;
Neal Norwitzd39d8612006-01-08 01:03:36 +0000662 long i, l, count;
663 size_t length;
Just van Rossum52e14d62002-12-30 22:08:05 +0000664 char path[MAXPATHLEN + 5];
665 char name[MAXPATHLEN + 5];
666 char *p, endof_central_dir[22];
Thomas Heller354e3d92003-07-22 18:10:15 +0000667 long arc_offset; /* offset from beginning of file to start of zip-archive */
Just van Rossum52e14d62002-12-30 22:08:05 +0000668
669 if (strlen(archive) > MAXPATHLEN) {
670 PyErr_SetString(PyExc_OverflowError,
671 "Zip path name is too long");
672 return NULL;
673 }
674 strcpy(path, archive);
675
676 fp = fopen(archive, "rb");
677 if (fp == NULL) {
678 PyErr_Format(ZipImportError, "can't open Zip file: "
679 "'%.200s'", archive);
680 return NULL;
681 }
Just van Rossumf4ecc752003-02-28 08:54:01 +0000682 fseek(fp, -22, SEEK_END);
Thomas Heller354e3d92003-07-22 18:10:15 +0000683 header_position = ftell(fp);
Just van Rossum52e14d62002-12-30 22:08:05 +0000684 if (fread(endof_central_dir, 1, 22, fp) != 22) {
685 fclose(fp);
686 PyErr_Format(ZipImportError, "can't read Zip file: "
687 "'%.200s'", archive);
688 return NULL;
689 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000690 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000691 /* Bad: End of Central Dir signature */
692 fclose(fp);
693 PyErr_Format(ZipImportError, "not a Zip file: "
694 "'%.200s'", archive);
695 return NULL;
696 }
697
Thomas Heller354e3d92003-07-22 18:10:15 +0000698 header_size = get_long((unsigned char *)endof_central_dir + 12);
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000699 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Thomas Heller354e3d92003-07-22 18:10:15 +0000700 arc_offset = header_position - header_offset - header_size;
701 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000702
703 files = PyDict_New();
704 if (files == NULL)
705 goto error;
706
707 length = (long)strlen(path);
708 path[length] = SEP;
709
710 /* Start of Central Directory */
711 count = 0;
712 for (;;) {
713 PyObject *t;
714 int err;
715
716 fseek(fp, header_offset, 0); /* Start of file header */
717 l = PyMarshal_ReadLongFromFile(fp);
718 if (l != 0x02014B50)
719 break; /* Bad: Central Dir File Header */
720 fseek(fp, header_offset + 10, 0);
721 compress = PyMarshal_ReadShortFromFile(fp);
722 time = PyMarshal_ReadShortFromFile(fp);
723 date = PyMarshal_ReadShortFromFile(fp);
724 crc = PyMarshal_ReadLongFromFile(fp);
725 data_size = PyMarshal_ReadLongFromFile(fp);
726 file_size = PyMarshal_ReadLongFromFile(fp);
727 name_size = PyMarshal_ReadShortFromFile(fp);
728 header_size = 46 + name_size +
729 PyMarshal_ReadShortFromFile(fp) +
730 PyMarshal_ReadShortFromFile(fp);
731 fseek(fp, header_offset + 42, 0);
Thomas Heller354e3d92003-07-22 18:10:15 +0000732 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000733 if (name_size > MAXPATHLEN)
734 name_size = MAXPATHLEN;
735
736 p = name;
737 for (i = 0; i < name_size; i++) {
738 *p = (char)getc(fp);
739 if (*p == '/')
740 *p = SEP;
741 p++;
742 }
743 *p = 0; /* Add terminating null byte */
744 header_offset += header_size;
745
746 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
747
748 t = Py_BuildValue("siiiiiii", path, compress, data_size,
749 file_size, file_offset, time, date, crc);
750 if (t == NULL)
751 goto error;
752 err = PyDict_SetItemString(files, name, t);
753 Py_DECREF(t);
754 if (err != 0)
755 goto error;
756 count++;
757 }
758 fclose(fp);
759 if (Py_VerboseFlag)
760 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
761 count, archive);
762 return files;
763error:
764 fclose(fp);
765 Py_XDECREF(files);
766 return NULL;
767}
768
769/* Return the zlib.decompress function object, or NULL if zlib couldn't
770 be imported. The function is cached when found, so subsequent calls
771 don't import zlib again. Returns a *borrowed* reference.
772 XXX This makes zlib.decompress immortal. */
773static PyObject *
774get_decompress_func(void)
775{
776 static PyObject *decompress = NULL;
777
778 if (decompress == NULL) {
779 PyObject *zlib;
780 static int importing_zlib = 0;
781
782 if (importing_zlib != 0)
783 /* Someone has a zlib.py[co] in their Zip file;
784 let's avoid a stack overflow. */
785 return NULL;
786 importing_zlib = 1;
787 zlib = PyImport_ImportModule("zlib"); /* import zlib */
788 importing_zlib = 0;
789 if (zlib != NULL) {
790 decompress = PyObject_GetAttrString(zlib,
791 "decompress");
792 Py_DECREF(zlib);
793 }
794 else
795 PyErr_Clear();
796 if (Py_VerboseFlag)
797 PySys_WriteStderr("# zipimport: zlib %s\n",
798 zlib != NULL ? "available": "UNAVAILABLE");
799 }
800 return decompress;
801}
802
803/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
804 data as a new reference. */
805static PyObject *
806get_data(char *archive, PyObject *toc_entry)
807{
808 PyObject *raw_data, *data = NULL, *decompress;
809 char *buf;
810 FILE *fp;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000811 int err;
812 Py_ssize_t bytes_read = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000813 long l;
814 char *datapath;
815 long compress, data_size, file_size, file_offset;
816 long time, date, crc;
817
Neal Norwitz0c0aad92003-02-18 03:37:49 +0000818 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
Just van Rossum52e14d62002-12-30 22:08:05 +0000819 &data_size, &file_size, &file_offset, &time,
820 &date, &crc)) {
821 return NULL;
822 }
823
824 fp = fopen(archive, "rb");
825 if (!fp) {
826 PyErr_Format(PyExc_IOError,
827 "zipimport: can not open file %s", archive);
828 return NULL;
829 }
830
831 /* Check to make sure the local file header is correct */
832 fseek(fp, file_offset, 0);
833 l = PyMarshal_ReadLongFromFile(fp);
834 if (l != 0x04034B50) {
835 /* Bad: Local File Header */
836 PyErr_Format(ZipImportError,
837 "bad local file header in %s",
838 archive);
839 fclose(fp);
840 return NULL;
841 }
842 fseek(fp, file_offset + 26, 0);
843 l = 30 + PyMarshal_ReadShortFromFile(fp) +
844 PyMarshal_ReadShortFromFile(fp); /* local header size */
845 file_offset += l; /* Start of file data */
846
847 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
848 data_size : data_size + 1);
849 if (raw_data == NULL) {
850 fclose(fp);
851 return NULL;
852 }
853 buf = PyString_AsString(raw_data);
854
855 err = fseek(fp, file_offset, 0);
856 if (err == 0)
857 bytes_read = fread(buf, 1, data_size, fp);
858 fclose(fp);
859 if (err || bytes_read != data_size) {
860 PyErr_SetString(PyExc_IOError,
861 "zipimport: can't read data");
862 Py_DECREF(raw_data);
863 return NULL;
864 }
865
866 if (compress != 0) {
867 buf[data_size] = 'Z'; /* saw this in zipfile.py */
868 data_size++;
869 }
870 buf[data_size] = '\0';
871
872 if (compress == 0) /* data is not compressed */
873 return raw_data;
874
875 /* Decompress with zlib */
876 decompress = get_decompress_func();
877 if (decompress == NULL) {
878 PyErr_SetString(ZipImportError,
879 "can't decompress data; "
880 "zlib not available");
881 goto error;
882 }
Just van Rossumee8f10f2003-09-07 13:36:48 +0000883 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000884error:
885 Py_DECREF(raw_data);
886 return data;
887}
888
889/* Lenient date/time comparison function. The precision of the mtime
890 in the archive is lower than the mtime stored in a .pyc: we
891 must allow a difference of at most one second. */
892static int
893eq_mtime(time_t t1, time_t t2)
894{
895 time_t d = t1 - t2;
896 if (d < 0)
897 d = -d;
898 /* dostime only stores even seconds, so be lenient */
899 return d <= 1;
900}
901
902/* Given the contents of a .py[co] file in a buffer, unmarshal the data
903 and return the code object. Return None if it the magic word doesn't
904 match (we do this instead of raising an exception as we fall back
905 to .py if available and we don't want to mask other errors).
906 Returns a new reference. */
907static PyObject *
908unmarshal_code(char *pathname, PyObject *data, time_t mtime)
909{
910 PyObject *code;
911 char *buf = PyString_AsString(data);
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000912 Py_ssize_t size = PyString_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +0000913
914 if (size <= 9) {
915 PyErr_SetString(ZipImportError,
916 "bad pyc data");
917 return NULL;
918 }
919
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000920 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000921 if (Py_VerboseFlag)
922 PySys_WriteStderr("# %s has bad magic\n",
923 pathname);
924 Py_INCREF(Py_None);
925 return Py_None; /* signal caller to try alternative */
926 }
927
Just van Rossum9a3129c2003-01-03 11:18:56 +0000928 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
929 mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000930 if (Py_VerboseFlag)
931 PySys_WriteStderr("# %s has bad mtime\n",
932 pathname);
933 Py_INCREF(Py_None);
934 return Py_None; /* signal caller to try alternative */
935 }
936
937 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
938 if (code == NULL)
939 return NULL;
940 if (!PyCode_Check(code)) {
941 Py_DECREF(code);
942 PyErr_Format(PyExc_TypeError,
943 "compiled module %.200s is not a code object",
944 pathname);
945 return NULL;
946 }
947 return code;
948}
949
950/* Replace any occurances of "\r\n?" in the input string with "\n".
951 This converts DOS and Mac line endings to Unix line endings.
952 Also append a trailing "\n" to be compatible with
953 PyParser_SimpleParseFile(). Returns a new reference. */
954static PyObject *
955normalize_line_endings(PyObject *source)
956{
Just van Rossum9a3129c2003-01-03 11:18:56 +0000957 char *buf, *q, *p = PyString_AsString(source);
Just van Rossum52e14d62002-12-30 22:08:05 +0000958 PyObject *fixed_source;
959
Just van Rossum9a3129c2003-01-03 11:18:56 +0000960 /* one char extra for trailing \n and one for terminating \0 */
961 buf = PyMem_Malloc(PyString_Size(source) + 2);
962 if (buf == NULL) {
963 PyErr_SetString(PyExc_MemoryError,
964 "zipimport: no memory to allocate "
965 "source buffer");
Just van Rossum52e14d62002-12-30 22:08:05 +0000966 return NULL;
Just van Rossum9a3129c2003-01-03 11:18:56 +0000967 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000968 /* replace "\r\n?" by "\n" */
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000969 for (q = buf; *p != '\0'; p++) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000970 if (*p == '\r') {
971 *q++ = '\n';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000972 if (*(p + 1) == '\n')
Just van Rossum52e14d62002-12-30 22:08:05 +0000973 p++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000974 }
975 else
976 *q++ = *p;
Just van Rossum52e14d62002-12-30 22:08:05 +0000977 }
978 *q++ = '\n'; /* add trailing \n */
979 *q = '\0';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000980 fixed_source = PyString_FromString(buf);
981 PyMem_Free(buf);
Just van Rossum52e14d62002-12-30 22:08:05 +0000982 return fixed_source;
983}
984
985/* Given a string buffer containing Python source code, compile it
986 return and return a code object as a new reference. */
987static PyObject *
988compile_source(char *pathname, PyObject *source)
989{
990 PyObject *code, *fixed_source;
991
992 fixed_source = normalize_line_endings(source);
993 if (fixed_source == NULL)
994 return NULL;
995
996 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
997 Py_file_input);
998 Py_DECREF(fixed_source);
999 return code;
1000}
1001
1002/* Convert the date/time values found in the Zip archive to a value
1003 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001004static time_t
1005parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001006{
1007 struct tm stm;
1008
1009 stm.tm_sec = (dostime & 0x1f) * 2;
1010 stm.tm_min = (dostime >> 5) & 0x3f;
1011 stm.tm_hour = (dostime >> 11) & 0x1f;
1012 stm.tm_mday = dosdate & 0x1f;
1013 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1014 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
Just van Rossum547eb422003-04-08 20:07:15 +00001015 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001016
1017 return mktime(&stm);
1018}
1019
1020/* Given a path to a .pyc or .pyo file in the archive, return the
1021 modifictaion time of the matching .py file, or 0 if no source
1022 is available. */
1023static time_t
1024get_mtime_of_source(ZipImporter *self, char *path)
1025{
1026 PyObject *toc_entry;
1027 time_t mtime = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001028 Py_ssize_t lastchar = strlen(path) - 1;
Just van Rossum52e14d62002-12-30 22:08:05 +00001029 char savechar = path[lastchar];
1030 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1031 toc_entry = PyDict_GetItemString(self->files, path);
1032 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1033 PyTuple_Size(toc_entry) == 8) {
Just van Rossumf8b6de12002-12-31 09:51:59 +00001034 /* fetch the time stamp of the .py file for comparison
1035 with an embedded pyc time stamp */
1036 int time, date;
Just van Rossum52e14d62002-12-30 22:08:05 +00001037 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1038 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1039 mtime = parse_dostime(time, date);
1040 }
1041 path[lastchar] = savechar;
1042 return mtime;
1043}
1044
1045/* Return the code object for the module named by 'fullname' from the
1046 Zip archive as a new reference. */
1047static PyObject *
1048get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1049 time_t mtime, PyObject *toc_entry)
1050{
1051 PyObject *data, *code;
1052 char *modpath;
1053 char *archive = PyString_AsString(self->archive);
1054
1055 if (archive == NULL)
1056 return NULL;
1057
1058 data = get_data(archive, toc_entry);
1059 if (data == NULL)
1060 return NULL;
1061
1062 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1063
1064 if (isbytecode) {
1065 code = unmarshal_code(modpath, data, mtime);
1066 }
1067 else {
1068 code = compile_source(modpath, data);
1069 }
1070 Py_DECREF(data);
1071 return code;
1072}
1073
1074/* Get the code object assoiciated with the module specified by
1075 'fullname'. */
1076static PyObject *
1077get_module_code(ZipImporter *self, char *fullname,
1078 int *p_ispackage, char **p_modpath)
1079{
1080 PyObject *toc_entry;
1081 char *subname, path[MAXPATHLEN + 1];
1082 int len;
1083 struct st_zip_searchorder *zso;
1084
1085 subname = get_subname(fullname);
1086
1087 len = make_filename(PyString_AsString(self->prefix), subname, path);
1088 if (len < 0)
1089 return NULL;
1090
1091 for (zso = zip_searchorder; *zso->suffix; zso++) {
1092 PyObject *code = NULL;
1093
1094 strcpy(path + len, zso->suffix);
1095 if (Py_VerboseFlag > 1)
1096 PySys_WriteStderr("# trying %s%c%s\n",
1097 PyString_AsString(self->archive),
1098 SEP, path);
1099 toc_entry = PyDict_GetItemString(self->files, path);
1100 if (toc_entry != NULL) {
1101 time_t mtime = 0;
1102 int ispackage = zso->type & IS_PACKAGE;
1103 int isbytecode = zso->type & IS_BYTECODE;
1104
1105 if (isbytecode)
1106 mtime = get_mtime_of_source(self, path);
1107 if (p_ispackage != NULL)
1108 *p_ispackage = ispackage;
1109 code = get_code_from_data(self, ispackage,
1110 isbytecode, mtime,
1111 toc_entry);
1112 if (code == Py_None) {
1113 /* bad magic number or non-matching mtime
1114 in byte code, try next */
1115 Py_DECREF(code);
1116 continue;
1117 }
1118 if (code != NULL && p_modpath != NULL)
1119 *p_modpath = PyString_AsString(
1120 PyTuple_GetItem(toc_entry, 0));
1121 return code;
1122 }
1123 }
1124 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1125 return NULL;
1126}
1127
1128
1129/* Module init */
1130
1131PyDoc_STRVAR(zipimport_doc,
1132"zipimport provides support for importing Python modules from Zip archives.\n\
1133\n\
1134This module exports three objects:\n\
1135- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001136- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001137 subclass of ImportError, so it can be caught as ImportError, too.\n\
1138- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1139 info dicts, as used in zipimporter._files.\n\
1140\n\
1141It is usually not needed to use the zipimport module explicitly; it is\n\
1142used by the builtin import mechanism for sys.path items that are paths\n\
1143to Zip archives.");
1144
1145PyMODINIT_FUNC
1146initzipimport(void)
1147{
1148 PyObject *mod;
1149
1150 if (PyType_Ready(&ZipImporter_Type) < 0)
1151 return;
1152
1153 /* Correct directory separator */
1154 zip_searchorder[0].suffix[0] = SEP;
1155 zip_searchorder[1].suffix[0] = SEP;
1156 zip_searchorder[2].suffix[0] = SEP;
1157 if (Py_OptimizeFlag) {
1158 /* Reverse *.pyc and *.pyo */
1159 struct st_zip_searchorder tmp;
1160 tmp = zip_searchorder[0];
1161 zip_searchorder[0] = zip_searchorder[1];
1162 zip_searchorder[1] = tmp;
1163 tmp = zip_searchorder[3];
1164 zip_searchorder[3] = zip_searchorder[4];
1165 zip_searchorder[4] = tmp;
1166 }
1167
1168 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1169 NULL, PYTHON_API_VERSION);
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00001170 if (mod == NULL)
1171 return;
Just van Rossum52e14d62002-12-30 22:08:05 +00001172
1173 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1174 PyExc_ImportError, NULL);
1175 if (ZipImportError == NULL)
1176 return;
1177
1178 Py_INCREF(ZipImportError);
1179 if (PyModule_AddObject(mod, "ZipImportError",
1180 ZipImportError) < 0)
1181 return;
1182
1183 Py_INCREF(&ZipImporter_Type);
1184 if (PyModule_AddObject(mod, "zipimporter",
1185 (PyObject *)&ZipImporter_Type) < 0)
1186 return;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001187
Just van Rossum52e14d62002-12-30 22:08:05 +00001188 zip_directory_cache = PyDict_New();
1189 if (zip_directory_cache == NULL)
1190 return;
1191 Py_INCREF(zip_directory_cache);
1192 if (PyModule_AddObject(mod, "_zip_directory_cache",
1193 zip_directory_cache) < 0)
1194 return;
1195}