blob: 937b0f733a9f566f01c33e3eb1af84decf4878b0 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
13 char suffix[14];
14 int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Just van Rossum52e14d62002-12-30 22:08:05 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
41};
42
43static PyTypeObject ZipImporter_Type;
44static PyObject *ZipImportError;
45static PyObject *zip_directory_cache = NULL;
46
47/* forward decls */
48static PyObject *read_directory(char *archive);
49static PyObject *get_data(char *archive, PyObject *toc_entry);
50static PyObject *get_module_code(ZipImporter *self, char *fullname,
51 int *p_ispackage, char **p_modpath);
52
53
54#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
55
56
57/* zipimporter.__init__
58 Split the "subdirectory" from the Zip archive path, lookup a matching
59 entry in sys.path_importer_cache, fetch the file directory from there
60 if found, or else read it from the archive. */
61static int
62zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
63{
64 char *path, *p, *prefix, buf[MAXPATHLEN+2];
Neal Norwitzd39d8612006-01-08 01:03:36 +000065 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +000066
Georg Brandl02c42872005-08-26 06:42:30 +000067 if (!_PyArg_NoKeywords("zipimporter()", kwds))
68 return -1;
69
Just van Rossum52e14d62002-12-30 22:08:05 +000070 if (!PyArg_ParseTuple(args, "s:zipimporter",
71 &path))
72 return -1;
73
74 len = strlen(path);
75 if (len == 0) {
76 PyErr_SetString(ZipImportError, "archive path is empty");
77 return -1;
78 }
79 if (len >= MAXPATHLEN) {
80 PyErr_SetString(ZipImportError,
81 "archive path too long");
82 return -1;
83 }
84 strcpy(buf, path);
85
86#ifdef ALTSEP
87 for (p = buf; *p; p++) {
88 if (*p == ALTSEP)
89 *p = SEP;
90 }
91#endif
92
93 path = NULL;
94 prefix = NULL;
95 for (;;) {
Martin v. Löwisa94568a2003-05-10 07:36:56 +000096#ifndef RISCOS
Just van Rossum52e14d62002-12-30 22:08:05 +000097 struct stat statbuf;
98 int rv;
99
100 rv = stat(buf, &statbuf);
101 if (rv == 0) {
102 /* it exists */
103 if (S_ISREG(statbuf.st_mode))
104 /* it's a file */
105 path = buf;
106 break;
107 }
Martin v. Löwisa94568a2003-05-10 07:36:56 +0000108#else
109 if (object_exists(buf)) {
110 /* it exists */
111 if (isfile(buf))
112 /* it's a file */
113 path = buf;
114 break;
115 }
116#endif
Just van Rossum52e14d62002-12-30 22:08:05 +0000117 /* back up one path element */
Just van Rossumd35c6db2003-01-02 12:55:48 +0000118 p = strrchr(buf, SEP);
Just van Rossum52e14d62002-12-30 22:08:05 +0000119 if (prefix != NULL)
120 *prefix = SEP;
121 if (p == NULL)
122 break;
123 *p = '\0';
124 prefix = p;
125 }
126 if (path != NULL) {
127 PyObject *files;
128 files = PyDict_GetItemString(zip_directory_cache, path);
129 if (files == NULL) {
130 files = read_directory(buf);
131 if (files == NULL)
132 return -1;
133 if (PyDict_SetItemString(zip_directory_cache, path,
134 files) != 0)
135 return -1;
136 }
137 else
138 Py_INCREF(files);
139 self->files = files;
140 }
141 else {
142 PyErr_SetString(ZipImportError, "not a Zip file");
143 return -1;
144 }
145
146 if (prefix == NULL)
147 prefix = "";
148 else {
149 prefix++;
150 len = strlen(prefix);
151 if (prefix[len-1] != SEP) {
152 /* add trailing SEP */
153 prefix[len] = SEP;
154 prefix[len + 1] = '\0';
155 }
156 }
157
158 self->archive = PyString_FromString(buf);
159 if (self->archive == NULL)
160 return -1;
161
162 self->prefix = PyString_FromString(prefix);
163 if (self->prefix == NULL)
164 return -1;
165
166 return 0;
167}
168
169/* GC support. */
170static int
171zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
172{
173 ZipImporter *self = (ZipImporter *)obj;
174 int err;
175
176 if (self->files != NULL) {
177 err = visit(self->files, arg);
178 if (err)
179 return err;
180 }
181 return 0;
182}
183
184static void
185zipimporter_dealloc(ZipImporter *self)
186{
187 PyObject_GC_UnTrack(self);
188 Py_XDECREF(self->archive);
Just van Rossumf8b6de12002-12-31 09:51:59 +0000189 Py_XDECREF(self->prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000190 Py_XDECREF(self->files);
191 self->ob_type->tp_free((PyObject *)self);
192}
193
194static PyObject *
195zipimporter_repr(ZipImporter *self)
196{
197 char buf[500];
198 char *archive = "???";
199 char *prefix = "";
200
201 if (self->archive != NULL && PyString_Check(self->archive))
202 archive = PyString_AsString(self->archive);
203 if (self->prefix != NULL && PyString_Check(self->prefix))
204 prefix = PyString_AsString(self->prefix);
205 if (prefix != NULL && *prefix)
206 PyOS_snprintf(buf, sizeof(buf),
207 "<zipimporter object \"%.300s%c%.150s\">",
208 archive, SEP, prefix);
209 else
210 PyOS_snprintf(buf, sizeof(buf),
211 "<zipimporter object \"%.300s\">",
212 archive);
213 return PyString_FromString(buf);
214}
215
216/* return fullname.split(".")[-1] */
217static char *
218get_subname(char *fullname)
219{
220 char *subname = strrchr(fullname, '.');
221 if (subname == NULL)
222 subname = fullname;
223 else
224 subname++;
225 return subname;
226}
227
228/* Given a (sub)modulename, write the potential file path in the
229 archive (without extension) to the path buffer. Return the
230 length of the resulting string. */
231static int
232make_filename(char *prefix, char *name, char *path)
233{
Neal Norwitzd39d8612006-01-08 01:03:36 +0000234 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000235 char *p;
236
237 len = strlen(prefix);
238
239 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
240 if (len + strlen(name) + 13 >= MAXPATHLEN) {
241 PyErr_SetString(ZipImportError, "path too long");
242 return -1;
243 }
244
245 strcpy(path, prefix);
246 strcpy(path + len, name);
247 for (p = path + len; *p; p++) {
248 if (*p == '.')
249 *p = SEP;
250 }
251 len += strlen(name);
Neal Norwitzd39d8612006-01-08 01:03:36 +0000252 return (int)len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000253}
254
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000255enum zi_module_info {
Just van Rossum52e14d62002-12-30 22:08:05 +0000256 MI_ERROR,
257 MI_NOT_FOUND,
258 MI_MODULE,
259 MI_PACKAGE
260};
261
262/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000263static enum zi_module_info
Just van Rossum52e14d62002-12-30 22:08:05 +0000264get_module_info(ZipImporter *self, char *fullname)
265{
266 char *subname, path[MAXPATHLEN + 1];
267 int len;
268 struct st_zip_searchorder *zso;
269
270 subname = get_subname(fullname);
271
272 len = make_filename(PyString_AsString(self->prefix), subname, path);
273 if (len < 0)
274 return MI_ERROR;
275
276 for (zso = zip_searchorder; *zso->suffix; zso++) {
277 strcpy(path + len, zso->suffix);
278 if (PyDict_GetItemString(self->files, path) != NULL) {
279 if (zso->type & IS_PACKAGE)
280 return MI_PACKAGE;
281 else
282 return MI_MODULE;
283 }
284 }
285 return MI_NOT_FOUND;
286}
287
288/* Check whether we can satisfy the import of the module named by
289 'fullname'. Return self if we can, None if we can't. */
290static PyObject *
291zipimporter_find_module(PyObject *obj, PyObject *args)
292{
293 ZipImporter *self = (ZipImporter *)obj;
294 PyObject *path = NULL;
295 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000296 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000297
298 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
299 &fullname, &path))
300 return NULL;
301
302 mi = get_module_info(self, fullname);
303 if (mi == MI_ERROR)
304 return NULL;
305 if (mi == MI_NOT_FOUND) {
306 Py_INCREF(Py_None);
307 return Py_None;
308 }
309 Py_INCREF(self);
310 return (PyObject *)self;
311}
312
313/* Load and return the module named by 'fullname'. */
314static PyObject *
315zipimporter_load_module(PyObject *obj, PyObject *args)
316{
317 ZipImporter *self = (ZipImporter *)obj;
318 PyObject *code, *mod, *dict;
319 char *fullname, *modpath;
320 int ispackage;
321
322 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
323 &fullname))
324 return NULL;
325
326 code = get_module_code(self, fullname, &ispackage, &modpath);
327 if (code == NULL)
328 return NULL;
329
330 mod = PyImport_AddModule(fullname);
331 if (mod == NULL) {
332 Py_DECREF(code);
333 return NULL;
334 }
335 dict = PyModule_GetDict(mod);
336
337 /* mod.__loader__ = self */
338 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
339 goto error;
340
341 if (ispackage) {
342 /* add __path__ to the module *before* the code gets
343 executed */
344 PyObject *pkgpath, *fullpath;
345 char *prefix = PyString_AsString(self->prefix);
346 char *subname = get_subname(fullname);
347 int err;
348
349 fullpath = PyString_FromFormat("%s%c%s%s",
350 PyString_AsString(self->archive),
351 SEP,
352 *prefix ? prefix : "",
353 subname);
354 if (fullpath == NULL)
355 goto error;
356
357 pkgpath = Py_BuildValue("[O]", fullpath);
358 Py_DECREF(fullpath);
359 if (pkgpath == NULL)
360 goto error;
361 err = PyDict_SetItemString(dict, "__path__", pkgpath);
362 Py_DECREF(pkgpath);
363 if (err != 0)
364 goto error;
365 }
366 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
367 Py_DECREF(code);
368 if (Py_VerboseFlag)
369 PySys_WriteStderr("import %s # loaded from Zip %s\n",
370 fullname, modpath);
371 return mod;
372error:
373 Py_DECREF(code);
374 Py_DECREF(mod);
375 return NULL;
376}
377
378/* Return a bool signifying whether the module is a package or not. */
379static PyObject *
380zipimporter_is_package(PyObject *obj, PyObject *args)
381{
382 ZipImporter *self = (ZipImporter *)obj;
383 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000384 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000385
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000386 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
Just van Rossum52e14d62002-12-30 22:08:05 +0000387 &fullname))
388 return NULL;
389
390 mi = get_module_info(self, fullname);
391 if (mi == MI_ERROR)
392 return NULL;
393 if (mi == MI_NOT_FOUND) {
394 PyErr_Format(ZipImportError, "can't find module '%.200s'",
395 fullname);
396 return NULL;
397 }
398 return PyBool_FromLong(mi == MI_PACKAGE);
399}
400
401static PyObject *
402zipimporter_get_data(PyObject *obj, PyObject *args)
403{
404 ZipImporter *self = (ZipImporter *)obj;
405 char *path;
406#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000407 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000408#endif
409 PyObject *toc_entry;
410 int len;
411
412 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
413 return NULL;
414
415#ifdef ALTSEP
416 if (strlen(path) >= MAXPATHLEN) {
417 PyErr_SetString(ZipImportError, "path too long");
418 return NULL;
419 }
420 strcpy(buf, path);
421 for (p = buf; *p; p++) {
422 if (*p == ALTSEP)
423 *p = SEP;
424 }
425 path = buf;
426#endif
427 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000428 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000429 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
430 path[len] == SEP) {
431 path = path + len + 1;
432 }
433
434 toc_entry = PyDict_GetItemString(self->files, path);
435 if (toc_entry == NULL) {
436 PyErr_Format(PyExc_IOError, "file not found [%.200s]",
437 path);
438 return NULL;
439 }
440 return get_data(PyString_AsString(self->archive), toc_entry);
441}
442
443static PyObject *
444zipimporter_get_code(PyObject *obj, PyObject *args)
445{
446 ZipImporter *self = (ZipImporter *)obj;
447 char *fullname;
448
449 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
450 return NULL;
451
452 return get_module_code(self, fullname, NULL, NULL);
453}
454
455static PyObject *
456zipimporter_get_source(PyObject *obj, PyObject *args)
457{
458 ZipImporter *self = (ZipImporter *)obj;
459 PyObject *toc_entry;
460 char *fullname, *subname, path[MAXPATHLEN+1];
461 int len;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000462 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000463
464 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
465 return NULL;
466
467 mi = get_module_info(self, fullname);
468 if (mi == MI_ERROR)
469 return NULL;
470 if (mi == MI_NOT_FOUND) {
471 PyErr_Format(ZipImportError, "can't find module '%.200s'",
472 fullname);
473 return NULL;
474 }
475 subname = get_subname(fullname);
476
477 len = make_filename(PyString_AsString(self->prefix), subname, path);
478 if (len < 0)
479 return NULL;
480
481 if (mi == MI_PACKAGE) {
482 path[len] = SEP;
483 strcpy(path + len + 1, "__init__.py");
484 }
485 else
486 strcpy(path + len, ".py");
487
488 toc_entry = PyDict_GetItemString(self->files, path);
489 if (toc_entry != NULL)
490 return get_data(PyString_AsString(self->archive), toc_entry);
491
492 /* we have the module, but no source */
493 Py_INCREF(Py_None);
494 return Py_None;
495}
496
497PyDoc_STRVAR(doc_find_module,
498"find_module(fullname, path=None) -> self or None.\n\
499\n\
500Search for a module specified by 'fullname'. 'fullname' must be the\n\
501fully qualified (dotted) module name. It returns the zipimporter\n\
502instance itself if the module was found, or None if it wasn't.\n\
503The optional 'path' argument is ignored -- it's there for compatibility\n\
504with the importer protocol.");
505
506PyDoc_STRVAR(doc_load_module,
507"load_module(fullname) -> module.\n\
508\n\
509Load the module specified by 'fullname'. 'fullname' must be the\n\
510fully qualified (dotted) module name. It returns the imported\n\
511module, or raises ZipImportError if it wasn't found.");
512
513PyDoc_STRVAR(doc_get_data,
514"get_data(pathname) -> string with file data.\n\
515\n\
516Return the data associated with 'pathname'. Raise IOError if\n\
517the file wasn't found.");
518
519PyDoc_STRVAR(doc_is_package,
520"is_package(fullname) -> bool.\n\
521\n\
522Return True if the module specified by fullname is a package.\n\
523Raise ZipImportError is the module couldn't be found.");
524
525PyDoc_STRVAR(doc_get_code,
526"get_code(fullname) -> code object.\n\
527\n\
528Return the code object for the specified module. Raise ZipImportError\n\
529is the module couldn't be found.");
530
531PyDoc_STRVAR(doc_get_source,
532"get_source(fullname) -> source string.\n\
533\n\
534Return the source code for the specified module. Raise ZipImportError\n\
535is the module couldn't be found, return None if the archive does\n\
536contain the module, but has no source for it.");
537
538static PyMethodDef zipimporter_methods[] = {
539 {"find_module", zipimporter_find_module, METH_VARARGS,
540 doc_find_module},
541 {"load_module", zipimporter_load_module, METH_VARARGS,
542 doc_load_module},
543 {"get_data", zipimporter_get_data, METH_VARARGS,
544 doc_get_data},
545 {"get_code", zipimporter_get_code, METH_VARARGS,
546 doc_get_code},
547 {"get_source", zipimporter_get_source, METH_VARARGS,
548 doc_get_source},
549 {"is_package", zipimporter_is_package, METH_VARARGS,
550 doc_is_package},
551 {NULL, NULL} /* sentinel */
552};
553
554static PyMemberDef zipimporter_members[] = {
555 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
556 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
557 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
558 {NULL}
559};
560
561PyDoc_STRVAR(zipimporter_doc,
562"zipimporter(archivepath) -> zipimporter object\n\
563\n\
564Create a new zipimporter instance. 'archivepath' must be a path to\n\
565a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
566a valid Zip archive.");
567
568#define DEFERRED_ADDRESS(ADDR) 0
569
570static PyTypeObject ZipImporter_Type = {
571 PyObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type))
572 0,
573 "zipimport.zipimporter",
574 sizeof(ZipImporter),
575 0, /* tp_itemsize */
576 (destructor)zipimporter_dealloc, /* tp_dealloc */
577 0, /* tp_print */
578 0, /* tp_getattr */
579 0, /* tp_setattr */
580 0, /* tp_compare */
581 (reprfunc)zipimporter_repr, /* tp_repr */
582 0, /* tp_as_number */
583 0, /* tp_as_sequence */
584 0, /* tp_as_mapping */
585 0, /* tp_hash */
586 0, /* tp_call */
587 0, /* tp_str */
588 PyObject_GenericGetAttr, /* tp_getattro */
589 0, /* tp_setattro */
590 0, /* tp_as_buffer */
591 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
592 Py_TPFLAGS_HAVE_GC, /* tp_flags */
593 zipimporter_doc, /* tp_doc */
594 zipimporter_traverse, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /* tp_weaklistoffset */
598 0, /* tp_iter */
599 0, /* tp_iternext */
600 zipimporter_methods, /* tp_methods */
601 zipimporter_members, /* tp_members */
602 0, /* tp_getset */
603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
608 (initproc)zipimporter_init, /* tp_init */
609 PyType_GenericAlloc, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
611 PyObject_GC_Del, /* tp_free */
612};
613
614
615/* implementation */
616
Just van Rossum52e14d62002-12-30 22:08:05 +0000617/* Given a buffer, return the long that is represented by the first
618 4 bytes, encoded as little endian. This partially reimplements
619 marshal.c:r_long() */
620static long
621get_long(unsigned char *buf) {
622 long x;
623 x = buf[0];
624 x |= (long)buf[1] << 8;
625 x |= (long)buf[2] << 16;
626 x |= (long)buf[3] << 24;
627#if SIZEOF_LONG > 4
628 /* Sign extension for 64-bit machines */
629 x |= -(x & 0x80000000L);
630#endif
631 return x;
632}
633
634/*
635 read_directory(archive) -> files dict (new reference)
636
637 Given a path to a Zip archive, build a dict, mapping file names
638 (local to the archive, using SEP as a separator) to toc entries.
639
640 A toc_entry is a tuple:
641
Fred Drakef5b7fd22005-11-11 19:34:56 +0000642 (__file__, # value to use for __file__, available for all files
643 compress, # compression kind; 0 for uncompressed
Just van Rossum52e14d62002-12-30 22:08:05 +0000644 data_size, # size of compressed data on disk
645 file_size, # size of decompressed data
646 file_offset, # offset of file header from start of archive
647 time, # mod time of file (in dos format)
648 date, # mod data of file (in dos format)
649 crc, # crc checksum of the data
650 )
651
652 Directories can be recognized by the trailing SEP in the name,
653 data_size and file_offset are 0.
654*/
655static PyObject *
656read_directory(char *archive)
657{
658 PyObject *files = NULL;
659 FILE *fp;
660 long compress, crc, data_size, file_size, file_offset, date, time;
Thomas Heller354e3d92003-07-22 18:10:15 +0000661 long header_offset, name_size, header_size, header_position;
Neal Norwitzd39d8612006-01-08 01:03:36 +0000662 long i, l, count;
663 size_t length;
Just van Rossum52e14d62002-12-30 22:08:05 +0000664 char path[MAXPATHLEN + 5];
665 char name[MAXPATHLEN + 5];
666 char *p, endof_central_dir[22];
Thomas Heller354e3d92003-07-22 18:10:15 +0000667 long arc_offset; /* offset from beginning of file to start of zip-archive */
Just van Rossum52e14d62002-12-30 22:08:05 +0000668
669 if (strlen(archive) > MAXPATHLEN) {
670 PyErr_SetString(PyExc_OverflowError,
671 "Zip path name is too long");
672 return NULL;
673 }
674 strcpy(path, archive);
675
676 fp = fopen(archive, "rb");
677 if (fp == NULL) {
678 PyErr_Format(ZipImportError, "can't open Zip file: "
679 "'%.200s'", archive);
680 return NULL;
681 }
Just van Rossumf4ecc752003-02-28 08:54:01 +0000682 fseek(fp, -22, SEEK_END);
Thomas Heller354e3d92003-07-22 18:10:15 +0000683 header_position = ftell(fp);
Just van Rossum52e14d62002-12-30 22:08:05 +0000684 if (fread(endof_central_dir, 1, 22, fp) != 22) {
685 fclose(fp);
686 PyErr_Format(ZipImportError, "can't read Zip file: "
687 "'%.200s'", archive);
688 return NULL;
689 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000690 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000691 /* Bad: End of Central Dir signature */
692 fclose(fp);
693 PyErr_Format(ZipImportError, "not a Zip file: "
694 "'%.200s'", archive);
695 return NULL;
696 }
697
Thomas Heller354e3d92003-07-22 18:10:15 +0000698 header_size = get_long((unsigned char *)endof_central_dir + 12);
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000699 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Thomas Heller354e3d92003-07-22 18:10:15 +0000700 arc_offset = header_position - header_offset - header_size;
701 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000702
703 files = PyDict_New();
704 if (files == NULL)
705 goto error;
706
707 length = (long)strlen(path);
708 path[length] = SEP;
709
710 /* Start of Central Directory */
711 count = 0;
712 for (;;) {
713 PyObject *t;
714 int err;
715
716 fseek(fp, header_offset, 0); /* Start of file header */
717 l = PyMarshal_ReadLongFromFile(fp);
718 if (l != 0x02014B50)
719 break; /* Bad: Central Dir File Header */
720 fseek(fp, header_offset + 10, 0);
721 compress = PyMarshal_ReadShortFromFile(fp);
722 time = PyMarshal_ReadShortFromFile(fp);
723 date = PyMarshal_ReadShortFromFile(fp);
724 crc = PyMarshal_ReadLongFromFile(fp);
725 data_size = PyMarshal_ReadLongFromFile(fp);
726 file_size = PyMarshal_ReadLongFromFile(fp);
727 name_size = PyMarshal_ReadShortFromFile(fp);
728 header_size = 46 + name_size +
729 PyMarshal_ReadShortFromFile(fp) +
730 PyMarshal_ReadShortFromFile(fp);
731 fseek(fp, header_offset + 42, 0);
Thomas Heller354e3d92003-07-22 18:10:15 +0000732 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000733 if (name_size > MAXPATHLEN)
734 name_size = MAXPATHLEN;
735
736 p = name;
737 for (i = 0; i < name_size; i++) {
738 *p = (char)getc(fp);
739 if (*p == '/')
740 *p = SEP;
741 p++;
742 }
743 *p = 0; /* Add terminating null byte */
744 header_offset += header_size;
745
746 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
747
748 t = Py_BuildValue("siiiiiii", path, compress, data_size,
749 file_size, file_offset, time, date, crc);
750 if (t == NULL)
751 goto error;
752 err = PyDict_SetItemString(files, name, t);
753 Py_DECREF(t);
754 if (err != 0)
755 goto error;
756 count++;
757 }
758 fclose(fp);
759 if (Py_VerboseFlag)
760 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
761 count, archive);
762 return files;
763error:
764 fclose(fp);
765 Py_XDECREF(files);
766 return NULL;
767}
768
769/* Return the zlib.decompress function object, or NULL if zlib couldn't
770 be imported. The function is cached when found, so subsequent calls
771 don't import zlib again. Returns a *borrowed* reference.
772 XXX This makes zlib.decompress immortal. */
773static PyObject *
774get_decompress_func(void)
775{
776 static PyObject *decompress = NULL;
777
778 if (decompress == NULL) {
779 PyObject *zlib;
780 static int importing_zlib = 0;
781
782 if (importing_zlib != 0)
783 /* Someone has a zlib.py[co] in their Zip file;
784 let's avoid a stack overflow. */
785 return NULL;
786 importing_zlib = 1;
787 zlib = PyImport_ImportModule("zlib"); /* import zlib */
788 importing_zlib = 0;
789 if (zlib != NULL) {
790 decompress = PyObject_GetAttrString(zlib,
791 "decompress");
792 Py_DECREF(zlib);
793 }
794 else
795 PyErr_Clear();
796 if (Py_VerboseFlag)
797 PySys_WriteStderr("# zipimport: zlib %s\n",
798 zlib != NULL ? "available": "UNAVAILABLE");
799 }
800 return decompress;
801}
802
803/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
804 data as a new reference. */
805static PyObject *
806get_data(char *archive, PyObject *toc_entry)
807{
808 PyObject *raw_data, *data = NULL, *decompress;
809 char *buf;
810 FILE *fp;
811 int err, bytes_read = 0;
812 long l;
813 char *datapath;
814 long compress, data_size, file_size, file_offset;
815 long time, date, crc;
816
Neal Norwitz0c0aad92003-02-18 03:37:49 +0000817 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
Just van Rossum52e14d62002-12-30 22:08:05 +0000818 &data_size, &file_size, &file_offset, &time,
819 &date, &crc)) {
820 return NULL;
821 }
822
823 fp = fopen(archive, "rb");
824 if (!fp) {
825 PyErr_Format(PyExc_IOError,
826 "zipimport: can not open file %s", archive);
827 return NULL;
828 }
829
830 /* Check to make sure the local file header is correct */
831 fseek(fp, file_offset, 0);
832 l = PyMarshal_ReadLongFromFile(fp);
833 if (l != 0x04034B50) {
834 /* Bad: Local File Header */
835 PyErr_Format(ZipImportError,
836 "bad local file header in %s",
837 archive);
838 fclose(fp);
839 return NULL;
840 }
841 fseek(fp, file_offset + 26, 0);
842 l = 30 + PyMarshal_ReadShortFromFile(fp) +
843 PyMarshal_ReadShortFromFile(fp); /* local header size */
844 file_offset += l; /* Start of file data */
845
846 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
847 data_size : data_size + 1);
848 if (raw_data == NULL) {
849 fclose(fp);
850 return NULL;
851 }
852 buf = PyString_AsString(raw_data);
853
854 err = fseek(fp, file_offset, 0);
855 if (err == 0)
856 bytes_read = fread(buf, 1, data_size, fp);
857 fclose(fp);
858 if (err || bytes_read != data_size) {
859 PyErr_SetString(PyExc_IOError,
860 "zipimport: can't read data");
861 Py_DECREF(raw_data);
862 return NULL;
863 }
864
865 if (compress != 0) {
866 buf[data_size] = 'Z'; /* saw this in zipfile.py */
867 data_size++;
868 }
869 buf[data_size] = '\0';
870
871 if (compress == 0) /* data is not compressed */
872 return raw_data;
873
874 /* Decompress with zlib */
875 decompress = get_decompress_func();
876 if (decompress == NULL) {
877 PyErr_SetString(ZipImportError,
878 "can't decompress data; "
879 "zlib not available");
880 goto error;
881 }
Just van Rossumee8f10f2003-09-07 13:36:48 +0000882 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000883error:
884 Py_DECREF(raw_data);
885 return data;
886}
887
888/* Lenient date/time comparison function. The precision of the mtime
889 in the archive is lower than the mtime stored in a .pyc: we
890 must allow a difference of at most one second. */
891static int
892eq_mtime(time_t t1, time_t t2)
893{
894 time_t d = t1 - t2;
895 if (d < 0)
896 d = -d;
897 /* dostime only stores even seconds, so be lenient */
898 return d <= 1;
899}
900
901/* Given the contents of a .py[co] file in a buffer, unmarshal the data
902 and return the code object. Return None if it the magic word doesn't
903 match (we do this instead of raising an exception as we fall back
904 to .py if available and we don't want to mask other errors).
905 Returns a new reference. */
906static PyObject *
907unmarshal_code(char *pathname, PyObject *data, time_t mtime)
908{
909 PyObject *code;
910 char *buf = PyString_AsString(data);
911 int size = PyString_Size(data);
912
913 if (size <= 9) {
914 PyErr_SetString(ZipImportError,
915 "bad pyc data");
916 return NULL;
917 }
918
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000919 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000920 if (Py_VerboseFlag)
921 PySys_WriteStderr("# %s has bad magic\n",
922 pathname);
923 Py_INCREF(Py_None);
924 return Py_None; /* signal caller to try alternative */
925 }
926
Just van Rossum9a3129c2003-01-03 11:18:56 +0000927 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
928 mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000929 if (Py_VerboseFlag)
930 PySys_WriteStderr("# %s has bad mtime\n",
931 pathname);
932 Py_INCREF(Py_None);
933 return Py_None; /* signal caller to try alternative */
934 }
935
936 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
937 if (code == NULL)
938 return NULL;
939 if (!PyCode_Check(code)) {
940 Py_DECREF(code);
941 PyErr_Format(PyExc_TypeError,
942 "compiled module %.200s is not a code object",
943 pathname);
944 return NULL;
945 }
946 return code;
947}
948
949/* Replace any occurances of "\r\n?" in the input string with "\n".
950 This converts DOS and Mac line endings to Unix line endings.
951 Also append a trailing "\n" to be compatible with
952 PyParser_SimpleParseFile(). Returns a new reference. */
953static PyObject *
954normalize_line_endings(PyObject *source)
955{
Just van Rossum9a3129c2003-01-03 11:18:56 +0000956 char *buf, *q, *p = PyString_AsString(source);
Just van Rossum52e14d62002-12-30 22:08:05 +0000957 PyObject *fixed_source;
958
Just van Rossum9a3129c2003-01-03 11:18:56 +0000959 /* one char extra for trailing \n and one for terminating \0 */
960 buf = PyMem_Malloc(PyString_Size(source) + 2);
961 if (buf == NULL) {
962 PyErr_SetString(PyExc_MemoryError,
963 "zipimport: no memory to allocate "
964 "source buffer");
Just van Rossum52e14d62002-12-30 22:08:05 +0000965 return NULL;
Just van Rossum9a3129c2003-01-03 11:18:56 +0000966 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000967 /* replace "\r\n?" by "\n" */
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000968 for (q = buf; *p != '\0'; p++) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000969 if (*p == '\r') {
970 *q++ = '\n';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000971 if (*(p + 1) == '\n')
Just van Rossum52e14d62002-12-30 22:08:05 +0000972 p++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000973 }
974 else
975 *q++ = *p;
Just van Rossum52e14d62002-12-30 22:08:05 +0000976 }
977 *q++ = '\n'; /* add trailing \n */
978 *q = '\0';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000979 fixed_source = PyString_FromString(buf);
980 PyMem_Free(buf);
Just van Rossum52e14d62002-12-30 22:08:05 +0000981 return fixed_source;
982}
983
984/* Given a string buffer containing Python source code, compile it
985 return and return a code object as a new reference. */
986static PyObject *
987compile_source(char *pathname, PyObject *source)
988{
989 PyObject *code, *fixed_source;
990
991 fixed_source = normalize_line_endings(source);
992 if (fixed_source == NULL)
993 return NULL;
994
995 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
996 Py_file_input);
997 Py_DECREF(fixed_source);
998 return code;
999}
1000
1001/* Convert the date/time values found in the Zip archive to a value
1002 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001003static time_t
1004parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001005{
1006 struct tm stm;
1007
1008 stm.tm_sec = (dostime & 0x1f) * 2;
1009 stm.tm_min = (dostime >> 5) & 0x3f;
1010 stm.tm_hour = (dostime >> 11) & 0x1f;
1011 stm.tm_mday = dosdate & 0x1f;
1012 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1013 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
Just van Rossum547eb422003-04-08 20:07:15 +00001014 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001015
1016 return mktime(&stm);
1017}
1018
1019/* Given a path to a .pyc or .pyo file in the archive, return the
1020 modifictaion time of the matching .py file, or 0 if no source
1021 is available. */
1022static time_t
1023get_mtime_of_source(ZipImporter *self, char *path)
1024{
1025 PyObject *toc_entry;
1026 time_t mtime = 0;
1027 int lastchar = strlen(path) - 1;
1028 char savechar = path[lastchar];
1029 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1030 toc_entry = PyDict_GetItemString(self->files, path);
1031 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1032 PyTuple_Size(toc_entry) == 8) {
Just van Rossumf8b6de12002-12-31 09:51:59 +00001033 /* fetch the time stamp of the .py file for comparison
1034 with an embedded pyc time stamp */
1035 int time, date;
Just van Rossum52e14d62002-12-30 22:08:05 +00001036 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1037 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1038 mtime = parse_dostime(time, date);
1039 }
1040 path[lastchar] = savechar;
1041 return mtime;
1042}
1043
1044/* Return the code object for the module named by 'fullname' from the
1045 Zip archive as a new reference. */
1046static PyObject *
1047get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1048 time_t mtime, PyObject *toc_entry)
1049{
1050 PyObject *data, *code;
1051 char *modpath;
1052 char *archive = PyString_AsString(self->archive);
1053
1054 if (archive == NULL)
1055 return NULL;
1056
1057 data = get_data(archive, toc_entry);
1058 if (data == NULL)
1059 return NULL;
1060
1061 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1062
1063 if (isbytecode) {
1064 code = unmarshal_code(modpath, data, mtime);
1065 }
1066 else {
1067 code = compile_source(modpath, data);
1068 }
1069 Py_DECREF(data);
1070 return code;
1071}
1072
1073/* Get the code object assoiciated with the module specified by
1074 'fullname'. */
1075static PyObject *
1076get_module_code(ZipImporter *self, char *fullname,
1077 int *p_ispackage, char **p_modpath)
1078{
1079 PyObject *toc_entry;
1080 char *subname, path[MAXPATHLEN + 1];
1081 int len;
1082 struct st_zip_searchorder *zso;
1083
1084 subname = get_subname(fullname);
1085
1086 len = make_filename(PyString_AsString(self->prefix), subname, path);
1087 if (len < 0)
1088 return NULL;
1089
1090 for (zso = zip_searchorder; *zso->suffix; zso++) {
1091 PyObject *code = NULL;
1092
1093 strcpy(path + len, zso->suffix);
1094 if (Py_VerboseFlag > 1)
1095 PySys_WriteStderr("# trying %s%c%s\n",
1096 PyString_AsString(self->archive),
1097 SEP, path);
1098 toc_entry = PyDict_GetItemString(self->files, path);
1099 if (toc_entry != NULL) {
1100 time_t mtime = 0;
1101 int ispackage = zso->type & IS_PACKAGE;
1102 int isbytecode = zso->type & IS_BYTECODE;
1103
1104 if (isbytecode)
1105 mtime = get_mtime_of_source(self, path);
1106 if (p_ispackage != NULL)
1107 *p_ispackage = ispackage;
1108 code = get_code_from_data(self, ispackage,
1109 isbytecode, mtime,
1110 toc_entry);
1111 if (code == Py_None) {
1112 /* bad magic number or non-matching mtime
1113 in byte code, try next */
1114 Py_DECREF(code);
1115 continue;
1116 }
1117 if (code != NULL && p_modpath != NULL)
1118 *p_modpath = PyString_AsString(
1119 PyTuple_GetItem(toc_entry, 0));
1120 return code;
1121 }
1122 }
1123 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1124 return NULL;
1125}
1126
1127
1128/* Module init */
1129
1130PyDoc_STRVAR(zipimport_doc,
1131"zipimport provides support for importing Python modules from Zip archives.\n\
1132\n\
1133This module exports three objects:\n\
1134- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001135- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001136 subclass of ImportError, so it can be caught as ImportError, too.\n\
1137- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1138 info dicts, as used in zipimporter._files.\n\
1139\n\
1140It is usually not needed to use the zipimport module explicitly; it is\n\
1141used by the builtin import mechanism for sys.path items that are paths\n\
1142to Zip archives.");
1143
1144PyMODINIT_FUNC
1145initzipimport(void)
1146{
1147 PyObject *mod;
1148
1149 if (PyType_Ready(&ZipImporter_Type) < 0)
1150 return;
1151
1152 /* Correct directory separator */
1153 zip_searchorder[0].suffix[0] = SEP;
1154 zip_searchorder[1].suffix[0] = SEP;
1155 zip_searchorder[2].suffix[0] = SEP;
1156 if (Py_OptimizeFlag) {
1157 /* Reverse *.pyc and *.pyo */
1158 struct st_zip_searchorder tmp;
1159 tmp = zip_searchorder[0];
1160 zip_searchorder[0] = zip_searchorder[1];
1161 zip_searchorder[1] = tmp;
1162 tmp = zip_searchorder[3];
1163 zip_searchorder[3] = zip_searchorder[4];
1164 zip_searchorder[4] = tmp;
1165 }
1166
1167 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1168 NULL, PYTHON_API_VERSION);
1169
1170 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1171 PyExc_ImportError, NULL);
1172 if (ZipImportError == NULL)
1173 return;
1174
1175 Py_INCREF(ZipImportError);
1176 if (PyModule_AddObject(mod, "ZipImportError",
1177 ZipImportError) < 0)
1178 return;
1179
1180 Py_INCREF(&ZipImporter_Type);
1181 if (PyModule_AddObject(mod, "zipimporter",
1182 (PyObject *)&ZipImporter_Type) < 0)
1183 return;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001184
Just van Rossum52e14d62002-12-30 22:08:05 +00001185 zip_directory_cache = PyDict_New();
1186 if (zip_directory_cache == NULL)
1187 return;
1188 Py_INCREF(zip_directory_cache);
1189 if (PyModule_AddObject(mod, "_zip_directory_cache",
1190 zip_directory_cache) < 0)
1191 return;
1192}