blob: 373255d5b81ed92cbddb0b0f2ab9c4f7b5eb0bc1 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
13 char suffix[14];
14 int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Just van Rossum52e14d62002-12-30 22:08:05 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
41};
42
43static PyTypeObject ZipImporter_Type;
44static PyObject *ZipImportError;
45static PyObject *zip_directory_cache = NULL;
46
47/* forward decls */
48static PyObject *read_directory(char *archive);
49static PyObject *get_data(char *archive, PyObject *toc_entry);
50static PyObject *get_module_code(ZipImporter *self, char *fullname,
51 int *p_ispackage, char **p_modpath);
52
53
54#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
55
56
57/* zipimporter.__init__
58 Split the "subdirectory" from the Zip archive path, lookup a matching
59 entry in sys.path_importer_cache, fetch the file directory from there
60 if found, or else read it from the archive. */
61static int
62zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
63{
64 char *path, *p, *prefix, buf[MAXPATHLEN+2];
Neal Norwitzd39d8612006-01-08 01:03:36 +000065 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +000066
Georg Brandl02c42872005-08-26 06:42:30 +000067 if (!_PyArg_NoKeywords("zipimporter()", kwds))
68 return -1;
69
Just van Rossum52e14d62002-12-30 22:08:05 +000070 if (!PyArg_ParseTuple(args, "s:zipimporter",
71 &path))
72 return -1;
73
74 len = strlen(path);
75 if (len == 0) {
76 PyErr_SetString(ZipImportError, "archive path is empty");
77 return -1;
78 }
79 if (len >= MAXPATHLEN) {
80 PyErr_SetString(ZipImportError,
81 "archive path too long");
82 return -1;
83 }
84 strcpy(buf, path);
85
86#ifdef ALTSEP
87 for (p = buf; *p; p++) {
88 if (*p == ALTSEP)
89 *p = SEP;
90 }
91#endif
92
93 path = NULL;
94 prefix = NULL;
95 for (;;) {
Martin v. Löwisa94568a2003-05-10 07:36:56 +000096#ifndef RISCOS
Just van Rossum52e14d62002-12-30 22:08:05 +000097 struct stat statbuf;
98 int rv;
99
100 rv = stat(buf, &statbuf);
101 if (rv == 0) {
102 /* it exists */
103 if (S_ISREG(statbuf.st_mode))
104 /* it's a file */
105 path = buf;
106 break;
107 }
Martin v. Löwisa94568a2003-05-10 07:36:56 +0000108#else
109 if (object_exists(buf)) {
110 /* it exists */
111 if (isfile(buf))
112 /* it's a file */
113 path = buf;
114 break;
115 }
116#endif
Just van Rossum52e14d62002-12-30 22:08:05 +0000117 /* back up one path element */
Just van Rossumd35c6db2003-01-02 12:55:48 +0000118 p = strrchr(buf, SEP);
Just van Rossum52e14d62002-12-30 22:08:05 +0000119 if (prefix != NULL)
120 *prefix = SEP;
121 if (p == NULL)
122 break;
123 *p = '\0';
124 prefix = p;
125 }
126 if (path != NULL) {
127 PyObject *files;
128 files = PyDict_GetItemString(zip_directory_cache, path);
129 if (files == NULL) {
130 files = read_directory(buf);
131 if (files == NULL)
132 return -1;
133 if (PyDict_SetItemString(zip_directory_cache, path,
134 files) != 0)
135 return -1;
136 }
137 else
138 Py_INCREF(files);
139 self->files = files;
140 }
141 else {
142 PyErr_SetString(ZipImportError, "not a Zip file");
143 return -1;
144 }
145
146 if (prefix == NULL)
147 prefix = "";
148 else {
149 prefix++;
150 len = strlen(prefix);
151 if (prefix[len-1] != SEP) {
152 /* add trailing SEP */
153 prefix[len] = SEP;
154 prefix[len + 1] = '\0';
155 }
156 }
157
158 self->archive = PyString_FromString(buf);
159 if (self->archive == NULL)
160 return -1;
161
162 self->prefix = PyString_FromString(prefix);
163 if (self->prefix == NULL)
164 return -1;
165
166 return 0;
167}
168
169/* GC support. */
170static int
171zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
172{
173 ZipImporter *self = (ZipImporter *)obj;
174 int err;
175
176 if (self->files != NULL) {
177 err = visit(self->files, arg);
178 if (err)
179 return err;
180 }
181 return 0;
182}
183
184static void
185zipimporter_dealloc(ZipImporter *self)
186{
187 PyObject_GC_UnTrack(self);
188 Py_XDECREF(self->archive);
Just van Rossumf8b6de12002-12-31 09:51:59 +0000189 Py_XDECREF(self->prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000190 Py_XDECREF(self->files);
191 self->ob_type->tp_free((PyObject *)self);
192}
193
194static PyObject *
195zipimporter_repr(ZipImporter *self)
196{
197 char buf[500];
198 char *archive = "???";
199 char *prefix = "";
200
201 if (self->archive != NULL && PyString_Check(self->archive))
202 archive = PyString_AsString(self->archive);
203 if (self->prefix != NULL && PyString_Check(self->prefix))
204 prefix = PyString_AsString(self->prefix);
205 if (prefix != NULL && *prefix)
206 PyOS_snprintf(buf, sizeof(buf),
207 "<zipimporter object \"%.300s%c%.150s\">",
208 archive, SEP, prefix);
209 else
210 PyOS_snprintf(buf, sizeof(buf),
211 "<zipimporter object \"%.300s\">",
212 archive);
213 return PyString_FromString(buf);
214}
215
216/* return fullname.split(".")[-1] */
217static char *
218get_subname(char *fullname)
219{
220 char *subname = strrchr(fullname, '.');
221 if (subname == NULL)
222 subname = fullname;
223 else
224 subname++;
225 return subname;
226}
227
228/* Given a (sub)modulename, write the potential file path in the
229 archive (without extension) to the path buffer. Return the
230 length of the resulting string. */
231static int
232make_filename(char *prefix, char *name, char *path)
233{
Neal Norwitzd39d8612006-01-08 01:03:36 +0000234 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000235 char *p;
236
237 len = strlen(prefix);
238
239 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
240 if (len + strlen(name) + 13 >= MAXPATHLEN) {
241 PyErr_SetString(ZipImportError, "path too long");
242 return -1;
243 }
244
245 strcpy(path, prefix);
246 strcpy(path + len, name);
247 for (p = path + len; *p; p++) {
248 if (*p == '.')
249 *p = SEP;
250 }
251 len += strlen(name);
Martin v. Löwis18e16552006-02-15 17:27:45 +0000252 assert(len < INT_MAX);
Neal Norwitzd39d8612006-01-08 01:03:36 +0000253 return (int)len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000254}
255
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000256enum zi_module_info {
Just van Rossum52e14d62002-12-30 22:08:05 +0000257 MI_ERROR,
258 MI_NOT_FOUND,
259 MI_MODULE,
260 MI_PACKAGE
261};
262
263/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000264static enum zi_module_info
Just van Rossum52e14d62002-12-30 22:08:05 +0000265get_module_info(ZipImporter *self, char *fullname)
266{
267 char *subname, path[MAXPATHLEN + 1];
268 int len;
269 struct st_zip_searchorder *zso;
270
271 subname = get_subname(fullname);
272
273 len = make_filename(PyString_AsString(self->prefix), subname, path);
274 if (len < 0)
275 return MI_ERROR;
276
277 for (zso = zip_searchorder; *zso->suffix; zso++) {
278 strcpy(path + len, zso->suffix);
279 if (PyDict_GetItemString(self->files, path) != NULL) {
280 if (zso->type & IS_PACKAGE)
281 return MI_PACKAGE;
282 else
283 return MI_MODULE;
284 }
285 }
286 return MI_NOT_FOUND;
287}
288
289/* Check whether we can satisfy the import of the module named by
290 'fullname'. Return self if we can, None if we can't. */
291static PyObject *
292zipimporter_find_module(PyObject *obj, PyObject *args)
293{
294 ZipImporter *self = (ZipImporter *)obj;
295 PyObject *path = NULL;
296 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000297 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000298
299 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
300 &fullname, &path))
301 return NULL;
302
303 mi = get_module_info(self, fullname);
304 if (mi == MI_ERROR)
305 return NULL;
306 if (mi == MI_NOT_FOUND) {
307 Py_INCREF(Py_None);
308 return Py_None;
309 }
310 Py_INCREF(self);
311 return (PyObject *)self;
312}
313
314/* Load and return the module named by 'fullname'. */
315static PyObject *
316zipimporter_load_module(PyObject *obj, PyObject *args)
317{
318 ZipImporter *self = (ZipImporter *)obj;
319 PyObject *code, *mod, *dict;
320 char *fullname, *modpath;
321 int ispackage;
322
323 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
324 &fullname))
325 return NULL;
326
327 code = get_module_code(self, fullname, &ispackage, &modpath);
328 if (code == NULL)
329 return NULL;
330
331 mod = PyImport_AddModule(fullname);
332 if (mod == NULL) {
333 Py_DECREF(code);
334 return NULL;
335 }
336 dict = PyModule_GetDict(mod);
337
338 /* mod.__loader__ = self */
339 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
340 goto error;
341
342 if (ispackage) {
343 /* add __path__ to the module *before* the code gets
344 executed */
345 PyObject *pkgpath, *fullpath;
346 char *prefix = PyString_AsString(self->prefix);
347 char *subname = get_subname(fullname);
348 int err;
349
350 fullpath = PyString_FromFormat("%s%c%s%s",
351 PyString_AsString(self->archive),
352 SEP,
353 *prefix ? prefix : "",
354 subname);
355 if (fullpath == NULL)
356 goto error;
357
358 pkgpath = Py_BuildValue("[O]", fullpath);
359 Py_DECREF(fullpath);
360 if (pkgpath == NULL)
361 goto error;
362 err = PyDict_SetItemString(dict, "__path__", pkgpath);
363 Py_DECREF(pkgpath);
364 if (err != 0)
365 goto error;
366 }
367 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
368 Py_DECREF(code);
369 if (Py_VerboseFlag)
370 PySys_WriteStderr("import %s # loaded from Zip %s\n",
371 fullname, modpath);
372 return mod;
373error:
374 Py_DECREF(code);
375 Py_DECREF(mod);
376 return NULL;
377}
378
379/* Return a bool signifying whether the module is a package or not. */
380static PyObject *
381zipimporter_is_package(PyObject *obj, PyObject *args)
382{
383 ZipImporter *self = (ZipImporter *)obj;
384 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000385 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000386
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000387 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
Just van Rossum52e14d62002-12-30 22:08:05 +0000388 &fullname))
389 return NULL;
390
391 mi = get_module_info(self, fullname);
392 if (mi == MI_ERROR)
393 return NULL;
394 if (mi == MI_NOT_FOUND) {
395 PyErr_Format(ZipImportError, "can't find module '%.200s'",
396 fullname);
397 return NULL;
398 }
399 return PyBool_FromLong(mi == MI_PACKAGE);
400}
401
402static PyObject *
403zipimporter_get_data(PyObject *obj, PyObject *args)
404{
405 ZipImporter *self = (ZipImporter *)obj;
406 char *path;
407#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000408 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000409#endif
410 PyObject *toc_entry;
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000411 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000412
413 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
414 return NULL;
415
416#ifdef ALTSEP
417 if (strlen(path) >= MAXPATHLEN) {
418 PyErr_SetString(ZipImportError, "path too long");
419 return NULL;
420 }
421 strcpy(buf, path);
422 for (p = buf; *p; p++) {
423 if (*p == ALTSEP)
424 *p = SEP;
425 }
426 path = buf;
427#endif
428 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000429 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000430 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
431 path[len] == SEP) {
432 path = path + len + 1;
433 }
434
435 toc_entry = PyDict_GetItemString(self->files, path);
436 if (toc_entry == NULL) {
437 PyErr_Format(PyExc_IOError, "file not found [%.200s]",
438 path);
439 return NULL;
440 }
441 return get_data(PyString_AsString(self->archive), toc_entry);
442}
443
444static PyObject *
445zipimporter_get_code(PyObject *obj, PyObject *args)
446{
447 ZipImporter *self = (ZipImporter *)obj;
448 char *fullname;
449
450 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
451 return NULL;
452
453 return get_module_code(self, fullname, NULL, NULL);
454}
455
456static PyObject *
457zipimporter_get_source(PyObject *obj, PyObject *args)
458{
459 ZipImporter *self = (ZipImporter *)obj;
460 PyObject *toc_entry;
461 char *fullname, *subname, path[MAXPATHLEN+1];
462 int len;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000463 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000464
465 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
466 return NULL;
467
468 mi = get_module_info(self, fullname);
469 if (mi == MI_ERROR)
470 return NULL;
471 if (mi == MI_NOT_FOUND) {
472 PyErr_Format(ZipImportError, "can't find module '%.200s'",
473 fullname);
474 return NULL;
475 }
476 subname = get_subname(fullname);
477
478 len = make_filename(PyString_AsString(self->prefix), subname, path);
479 if (len < 0)
480 return NULL;
481
482 if (mi == MI_PACKAGE) {
483 path[len] = SEP;
484 strcpy(path + len + 1, "__init__.py");
485 }
486 else
487 strcpy(path + len, ".py");
488
489 toc_entry = PyDict_GetItemString(self->files, path);
490 if (toc_entry != NULL)
491 return get_data(PyString_AsString(self->archive), toc_entry);
492
493 /* we have the module, but no source */
494 Py_INCREF(Py_None);
495 return Py_None;
496}
497
498PyDoc_STRVAR(doc_find_module,
499"find_module(fullname, path=None) -> self or None.\n\
500\n\
501Search for a module specified by 'fullname'. 'fullname' must be the\n\
502fully qualified (dotted) module name. It returns the zipimporter\n\
503instance itself if the module was found, or None if it wasn't.\n\
504The optional 'path' argument is ignored -- it's there for compatibility\n\
505with the importer protocol.");
506
507PyDoc_STRVAR(doc_load_module,
508"load_module(fullname) -> module.\n\
509\n\
510Load the module specified by 'fullname'. 'fullname' must be the\n\
511fully qualified (dotted) module name. It returns the imported\n\
512module, or raises ZipImportError if it wasn't found.");
513
514PyDoc_STRVAR(doc_get_data,
515"get_data(pathname) -> string with file data.\n\
516\n\
517Return the data associated with 'pathname'. Raise IOError if\n\
518the file wasn't found.");
519
520PyDoc_STRVAR(doc_is_package,
521"is_package(fullname) -> bool.\n\
522\n\
523Return True if the module specified by fullname is a package.\n\
524Raise ZipImportError is the module couldn't be found.");
525
526PyDoc_STRVAR(doc_get_code,
527"get_code(fullname) -> code object.\n\
528\n\
529Return the code object for the specified module. Raise ZipImportError\n\
530is the module couldn't be found.");
531
532PyDoc_STRVAR(doc_get_source,
533"get_source(fullname) -> source string.\n\
534\n\
535Return the source code for the specified module. Raise ZipImportError\n\
536is the module couldn't be found, return None if the archive does\n\
537contain the module, but has no source for it.");
538
539static PyMethodDef zipimporter_methods[] = {
540 {"find_module", zipimporter_find_module, METH_VARARGS,
541 doc_find_module},
542 {"load_module", zipimporter_load_module, METH_VARARGS,
543 doc_load_module},
544 {"get_data", zipimporter_get_data, METH_VARARGS,
545 doc_get_data},
546 {"get_code", zipimporter_get_code, METH_VARARGS,
547 doc_get_code},
548 {"get_source", zipimporter_get_source, METH_VARARGS,
549 doc_get_source},
550 {"is_package", zipimporter_is_package, METH_VARARGS,
551 doc_is_package},
552 {NULL, NULL} /* sentinel */
553};
554
555static PyMemberDef zipimporter_members[] = {
556 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
557 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
558 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
559 {NULL}
560};
561
562PyDoc_STRVAR(zipimporter_doc,
563"zipimporter(archivepath) -> zipimporter object\n\
564\n\
565Create a new zipimporter instance. 'archivepath' must be a path to\n\
566a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
567a valid Zip archive.");
568
569#define DEFERRED_ADDRESS(ADDR) 0
570
571static PyTypeObject ZipImporter_Type = {
572 PyObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type))
573 0,
574 "zipimport.zipimporter",
575 sizeof(ZipImporter),
576 0, /* tp_itemsize */
577 (destructor)zipimporter_dealloc, /* tp_dealloc */
578 0, /* tp_print */
579 0, /* tp_getattr */
580 0, /* tp_setattr */
581 0, /* tp_compare */
582 (reprfunc)zipimporter_repr, /* tp_repr */
583 0, /* tp_as_number */
584 0, /* tp_as_sequence */
585 0, /* tp_as_mapping */
586 0, /* tp_hash */
587 0, /* tp_call */
588 0, /* tp_str */
589 PyObject_GenericGetAttr, /* tp_getattro */
590 0, /* tp_setattro */
591 0, /* tp_as_buffer */
592 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
593 Py_TPFLAGS_HAVE_GC, /* tp_flags */
594 zipimporter_doc, /* tp_doc */
595 zipimporter_traverse, /* tp_traverse */
596 0, /* tp_clear */
597 0, /* tp_richcompare */
598 0, /* tp_weaklistoffset */
599 0, /* tp_iter */
600 0, /* tp_iternext */
601 zipimporter_methods, /* tp_methods */
602 zipimporter_members, /* tp_members */
603 0, /* tp_getset */
604 0, /* tp_base */
605 0, /* tp_dict */
606 0, /* tp_descr_get */
607 0, /* tp_descr_set */
608 0, /* tp_dictoffset */
609 (initproc)zipimporter_init, /* tp_init */
610 PyType_GenericAlloc, /* tp_alloc */
611 PyType_GenericNew, /* tp_new */
612 PyObject_GC_Del, /* tp_free */
613};
614
615
616/* implementation */
617
Just van Rossum52e14d62002-12-30 22:08:05 +0000618/* Given a buffer, return the long that is represented by the first
619 4 bytes, encoded as little endian. This partially reimplements
620 marshal.c:r_long() */
621static long
622get_long(unsigned char *buf) {
623 long x;
624 x = buf[0];
625 x |= (long)buf[1] << 8;
626 x |= (long)buf[2] << 16;
627 x |= (long)buf[3] << 24;
628#if SIZEOF_LONG > 4
629 /* Sign extension for 64-bit machines */
630 x |= -(x & 0x80000000L);
631#endif
632 return x;
633}
634
635/*
636 read_directory(archive) -> files dict (new reference)
637
638 Given a path to a Zip archive, build a dict, mapping file names
639 (local to the archive, using SEP as a separator) to toc entries.
640
641 A toc_entry is a tuple:
642
Fred Drakef5b7fd22005-11-11 19:34:56 +0000643 (__file__, # value to use for __file__, available for all files
644 compress, # compression kind; 0 for uncompressed
Just van Rossum52e14d62002-12-30 22:08:05 +0000645 data_size, # size of compressed data on disk
646 file_size, # size of decompressed data
647 file_offset, # offset of file header from start of archive
648 time, # mod time of file (in dos format)
649 date, # mod data of file (in dos format)
650 crc, # crc checksum of the data
651 )
652
653 Directories can be recognized by the trailing SEP in the name,
654 data_size and file_offset are 0.
655*/
656static PyObject *
657read_directory(char *archive)
658{
659 PyObject *files = NULL;
660 FILE *fp;
661 long compress, crc, data_size, file_size, file_offset, date, time;
Thomas Heller354e3d92003-07-22 18:10:15 +0000662 long header_offset, name_size, header_size, header_position;
Neal Norwitzd39d8612006-01-08 01:03:36 +0000663 long i, l, count;
664 size_t length;
Just van Rossum52e14d62002-12-30 22:08:05 +0000665 char path[MAXPATHLEN + 5];
666 char name[MAXPATHLEN + 5];
667 char *p, endof_central_dir[22];
Thomas Heller354e3d92003-07-22 18:10:15 +0000668 long arc_offset; /* offset from beginning of file to start of zip-archive */
Just van Rossum52e14d62002-12-30 22:08:05 +0000669
670 if (strlen(archive) > MAXPATHLEN) {
671 PyErr_SetString(PyExc_OverflowError,
672 "Zip path name is too long");
673 return NULL;
674 }
675 strcpy(path, archive);
676
677 fp = fopen(archive, "rb");
678 if (fp == NULL) {
679 PyErr_Format(ZipImportError, "can't open Zip file: "
680 "'%.200s'", archive);
681 return NULL;
682 }
Just van Rossumf4ecc752003-02-28 08:54:01 +0000683 fseek(fp, -22, SEEK_END);
Thomas Heller354e3d92003-07-22 18:10:15 +0000684 header_position = ftell(fp);
Just van Rossum52e14d62002-12-30 22:08:05 +0000685 if (fread(endof_central_dir, 1, 22, fp) != 22) {
686 fclose(fp);
687 PyErr_Format(ZipImportError, "can't read Zip file: "
688 "'%.200s'", archive);
689 return NULL;
690 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000691 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000692 /* Bad: End of Central Dir signature */
693 fclose(fp);
694 PyErr_Format(ZipImportError, "not a Zip file: "
695 "'%.200s'", archive);
696 return NULL;
697 }
698
Thomas Heller354e3d92003-07-22 18:10:15 +0000699 header_size = get_long((unsigned char *)endof_central_dir + 12);
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000700 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Thomas Heller354e3d92003-07-22 18:10:15 +0000701 arc_offset = header_position - header_offset - header_size;
702 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000703
704 files = PyDict_New();
705 if (files == NULL)
706 goto error;
707
708 length = (long)strlen(path);
709 path[length] = SEP;
710
711 /* Start of Central Directory */
712 count = 0;
713 for (;;) {
714 PyObject *t;
715 int err;
716
717 fseek(fp, header_offset, 0); /* Start of file header */
718 l = PyMarshal_ReadLongFromFile(fp);
719 if (l != 0x02014B50)
720 break; /* Bad: Central Dir File Header */
721 fseek(fp, header_offset + 10, 0);
722 compress = PyMarshal_ReadShortFromFile(fp);
723 time = PyMarshal_ReadShortFromFile(fp);
724 date = PyMarshal_ReadShortFromFile(fp);
725 crc = PyMarshal_ReadLongFromFile(fp);
726 data_size = PyMarshal_ReadLongFromFile(fp);
727 file_size = PyMarshal_ReadLongFromFile(fp);
728 name_size = PyMarshal_ReadShortFromFile(fp);
729 header_size = 46 + name_size +
730 PyMarshal_ReadShortFromFile(fp) +
731 PyMarshal_ReadShortFromFile(fp);
732 fseek(fp, header_offset + 42, 0);
Thomas Heller354e3d92003-07-22 18:10:15 +0000733 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000734 if (name_size > MAXPATHLEN)
735 name_size = MAXPATHLEN;
736
737 p = name;
738 for (i = 0; i < name_size; i++) {
739 *p = (char)getc(fp);
740 if (*p == '/')
741 *p = SEP;
742 p++;
743 }
744 *p = 0; /* Add terminating null byte */
745 header_offset += header_size;
746
747 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
748
749 t = Py_BuildValue("siiiiiii", path, compress, data_size,
750 file_size, file_offset, time, date, crc);
751 if (t == NULL)
752 goto error;
753 err = PyDict_SetItemString(files, name, t);
754 Py_DECREF(t);
755 if (err != 0)
756 goto error;
757 count++;
758 }
759 fclose(fp);
760 if (Py_VerboseFlag)
761 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
762 count, archive);
763 return files;
764error:
765 fclose(fp);
766 Py_XDECREF(files);
767 return NULL;
768}
769
770/* Return the zlib.decompress function object, or NULL if zlib couldn't
771 be imported. The function is cached when found, so subsequent calls
772 don't import zlib again. Returns a *borrowed* reference.
773 XXX This makes zlib.decompress immortal. */
774static PyObject *
775get_decompress_func(void)
776{
777 static PyObject *decompress = NULL;
778
779 if (decompress == NULL) {
780 PyObject *zlib;
781 static int importing_zlib = 0;
782
783 if (importing_zlib != 0)
784 /* Someone has a zlib.py[co] in their Zip file;
785 let's avoid a stack overflow. */
786 return NULL;
787 importing_zlib = 1;
788 zlib = PyImport_ImportModule("zlib"); /* import zlib */
789 importing_zlib = 0;
790 if (zlib != NULL) {
791 decompress = PyObject_GetAttrString(zlib,
792 "decompress");
793 Py_DECREF(zlib);
794 }
795 else
796 PyErr_Clear();
797 if (Py_VerboseFlag)
798 PySys_WriteStderr("# zipimport: zlib %s\n",
799 zlib != NULL ? "available": "UNAVAILABLE");
800 }
801 return decompress;
802}
803
804/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
805 data as a new reference. */
806static PyObject *
807get_data(char *archive, PyObject *toc_entry)
808{
809 PyObject *raw_data, *data = NULL, *decompress;
810 char *buf;
811 FILE *fp;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000812 int err;
813 Py_ssize_t bytes_read = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000814 long l;
815 char *datapath;
816 long compress, data_size, file_size, file_offset;
817 long time, date, crc;
818
Neal Norwitz0c0aad92003-02-18 03:37:49 +0000819 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
Just van Rossum52e14d62002-12-30 22:08:05 +0000820 &data_size, &file_size, &file_offset, &time,
821 &date, &crc)) {
822 return NULL;
823 }
824
825 fp = fopen(archive, "rb");
826 if (!fp) {
827 PyErr_Format(PyExc_IOError,
828 "zipimport: can not open file %s", archive);
829 return NULL;
830 }
831
832 /* Check to make sure the local file header is correct */
833 fseek(fp, file_offset, 0);
834 l = PyMarshal_ReadLongFromFile(fp);
835 if (l != 0x04034B50) {
836 /* Bad: Local File Header */
837 PyErr_Format(ZipImportError,
838 "bad local file header in %s",
839 archive);
840 fclose(fp);
841 return NULL;
842 }
843 fseek(fp, file_offset + 26, 0);
844 l = 30 + PyMarshal_ReadShortFromFile(fp) +
845 PyMarshal_ReadShortFromFile(fp); /* local header size */
846 file_offset += l; /* Start of file data */
847
848 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
849 data_size : data_size + 1);
850 if (raw_data == NULL) {
851 fclose(fp);
852 return NULL;
853 }
854 buf = PyString_AsString(raw_data);
855
856 err = fseek(fp, file_offset, 0);
857 if (err == 0)
858 bytes_read = fread(buf, 1, data_size, fp);
859 fclose(fp);
860 if (err || bytes_read != data_size) {
861 PyErr_SetString(PyExc_IOError,
862 "zipimport: can't read data");
863 Py_DECREF(raw_data);
864 return NULL;
865 }
866
867 if (compress != 0) {
868 buf[data_size] = 'Z'; /* saw this in zipfile.py */
869 data_size++;
870 }
871 buf[data_size] = '\0';
872
873 if (compress == 0) /* data is not compressed */
874 return raw_data;
875
876 /* Decompress with zlib */
877 decompress = get_decompress_func();
878 if (decompress == NULL) {
879 PyErr_SetString(ZipImportError,
880 "can't decompress data; "
881 "zlib not available");
882 goto error;
883 }
Just van Rossumee8f10f2003-09-07 13:36:48 +0000884 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000885error:
886 Py_DECREF(raw_data);
887 return data;
888}
889
890/* Lenient date/time comparison function. The precision of the mtime
891 in the archive is lower than the mtime stored in a .pyc: we
892 must allow a difference of at most one second. */
893static int
894eq_mtime(time_t t1, time_t t2)
895{
896 time_t d = t1 - t2;
897 if (d < 0)
898 d = -d;
899 /* dostime only stores even seconds, so be lenient */
900 return d <= 1;
901}
902
903/* Given the contents of a .py[co] file in a buffer, unmarshal the data
904 and return the code object. Return None if it the magic word doesn't
905 match (we do this instead of raising an exception as we fall back
906 to .py if available and we don't want to mask other errors).
907 Returns a new reference. */
908static PyObject *
909unmarshal_code(char *pathname, PyObject *data, time_t mtime)
910{
911 PyObject *code;
912 char *buf = PyString_AsString(data);
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000913 Py_ssize_t size = PyString_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +0000914
915 if (size <= 9) {
916 PyErr_SetString(ZipImportError,
917 "bad pyc data");
918 return NULL;
919 }
920
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000921 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000922 if (Py_VerboseFlag)
923 PySys_WriteStderr("# %s has bad magic\n",
924 pathname);
925 Py_INCREF(Py_None);
926 return Py_None; /* signal caller to try alternative */
927 }
928
Just van Rossum9a3129c2003-01-03 11:18:56 +0000929 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
930 mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000931 if (Py_VerboseFlag)
932 PySys_WriteStderr("# %s has bad mtime\n",
933 pathname);
934 Py_INCREF(Py_None);
935 return Py_None; /* signal caller to try alternative */
936 }
937
938 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
939 if (code == NULL)
940 return NULL;
941 if (!PyCode_Check(code)) {
942 Py_DECREF(code);
943 PyErr_Format(PyExc_TypeError,
944 "compiled module %.200s is not a code object",
945 pathname);
946 return NULL;
947 }
948 return code;
949}
950
951/* Replace any occurances of "\r\n?" in the input string with "\n".
952 This converts DOS and Mac line endings to Unix line endings.
953 Also append a trailing "\n" to be compatible with
954 PyParser_SimpleParseFile(). Returns a new reference. */
955static PyObject *
956normalize_line_endings(PyObject *source)
957{
Just van Rossum9a3129c2003-01-03 11:18:56 +0000958 char *buf, *q, *p = PyString_AsString(source);
Just van Rossum52e14d62002-12-30 22:08:05 +0000959 PyObject *fixed_source;
960
Just van Rossum9a3129c2003-01-03 11:18:56 +0000961 /* one char extra for trailing \n and one for terminating \0 */
962 buf = PyMem_Malloc(PyString_Size(source) + 2);
963 if (buf == NULL) {
964 PyErr_SetString(PyExc_MemoryError,
965 "zipimport: no memory to allocate "
966 "source buffer");
Just van Rossum52e14d62002-12-30 22:08:05 +0000967 return NULL;
Just van Rossum9a3129c2003-01-03 11:18:56 +0000968 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000969 /* replace "\r\n?" by "\n" */
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000970 for (q = buf; *p != '\0'; p++) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000971 if (*p == '\r') {
972 *q++ = '\n';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000973 if (*(p + 1) == '\n')
Just van Rossum52e14d62002-12-30 22:08:05 +0000974 p++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000975 }
976 else
977 *q++ = *p;
Just van Rossum52e14d62002-12-30 22:08:05 +0000978 }
979 *q++ = '\n'; /* add trailing \n */
980 *q = '\0';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000981 fixed_source = PyString_FromString(buf);
982 PyMem_Free(buf);
Just van Rossum52e14d62002-12-30 22:08:05 +0000983 return fixed_source;
984}
985
986/* Given a string buffer containing Python source code, compile it
987 return and return a code object as a new reference. */
988static PyObject *
989compile_source(char *pathname, PyObject *source)
990{
991 PyObject *code, *fixed_source;
992
993 fixed_source = normalize_line_endings(source);
994 if (fixed_source == NULL)
995 return NULL;
996
997 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
998 Py_file_input);
999 Py_DECREF(fixed_source);
1000 return code;
1001}
1002
1003/* Convert the date/time values found in the Zip archive to a value
1004 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001005static time_t
1006parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001007{
1008 struct tm stm;
1009
1010 stm.tm_sec = (dostime & 0x1f) * 2;
1011 stm.tm_min = (dostime >> 5) & 0x3f;
1012 stm.tm_hour = (dostime >> 11) & 0x1f;
1013 stm.tm_mday = dosdate & 0x1f;
1014 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1015 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
Just van Rossum547eb422003-04-08 20:07:15 +00001016 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001017
1018 return mktime(&stm);
1019}
1020
1021/* Given a path to a .pyc or .pyo file in the archive, return the
1022 modifictaion time of the matching .py file, or 0 if no source
1023 is available. */
1024static time_t
1025get_mtime_of_source(ZipImporter *self, char *path)
1026{
1027 PyObject *toc_entry;
1028 time_t mtime = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001029 Py_ssize_t lastchar = strlen(path) - 1;
Just van Rossum52e14d62002-12-30 22:08:05 +00001030 char savechar = path[lastchar];
1031 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1032 toc_entry = PyDict_GetItemString(self->files, path);
1033 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1034 PyTuple_Size(toc_entry) == 8) {
Just van Rossumf8b6de12002-12-31 09:51:59 +00001035 /* fetch the time stamp of the .py file for comparison
1036 with an embedded pyc time stamp */
1037 int time, date;
Just van Rossum52e14d62002-12-30 22:08:05 +00001038 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1039 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1040 mtime = parse_dostime(time, date);
1041 }
1042 path[lastchar] = savechar;
1043 return mtime;
1044}
1045
1046/* Return the code object for the module named by 'fullname' from the
1047 Zip archive as a new reference. */
1048static PyObject *
1049get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1050 time_t mtime, PyObject *toc_entry)
1051{
1052 PyObject *data, *code;
1053 char *modpath;
1054 char *archive = PyString_AsString(self->archive);
1055
1056 if (archive == NULL)
1057 return NULL;
1058
1059 data = get_data(archive, toc_entry);
1060 if (data == NULL)
1061 return NULL;
1062
1063 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1064
1065 if (isbytecode) {
1066 code = unmarshal_code(modpath, data, mtime);
1067 }
1068 else {
1069 code = compile_source(modpath, data);
1070 }
1071 Py_DECREF(data);
1072 return code;
1073}
1074
1075/* Get the code object assoiciated with the module specified by
1076 'fullname'. */
1077static PyObject *
1078get_module_code(ZipImporter *self, char *fullname,
1079 int *p_ispackage, char **p_modpath)
1080{
1081 PyObject *toc_entry;
1082 char *subname, path[MAXPATHLEN + 1];
1083 int len;
1084 struct st_zip_searchorder *zso;
1085
1086 subname = get_subname(fullname);
1087
1088 len = make_filename(PyString_AsString(self->prefix), subname, path);
1089 if (len < 0)
1090 return NULL;
1091
1092 for (zso = zip_searchorder; *zso->suffix; zso++) {
1093 PyObject *code = NULL;
1094
1095 strcpy(path + len, zso->suffix);
1096 if (Py_VerboseFlag > 1)
1097 PySys_WriteStderr("# trying %s%c%s\n",
1098 PyString_AsString(self->archive),
1099 SEP, path);
1100 toc_entry = PyDict_GetItemString(self->files, path);
1101 if (toc_entry != NULL) {
1102 time_t mtime = 0;
1103 int ispackage = zso->type & IS_PACKAGE;
1104 int isbytecode = zso->type & IS_BYTECODE;
1105
1106 if (isbytecode)
1107 mtime = get_mtime_of_source(self, path);
1108 if (p_ispackage != NULL)
1109 *p_ispackage = ispackage;
1110 code = get_code_from_data(self, ispackage,
1111 isbytecode, mtime,
1112 toc_entry);
1113 if (code == Py_None) {
1114 /* bad magic number or non-matching mtime
1115 in byte code, try next */
1116 Py_DECREF(code);
1117 continue;
1118 }
1119 if (code != NULL && p_modpath != NULL)
1120 *p_modpath = PyString_AsString(
1121 PyTuple_GetItem(toc_entry, 0));
1122 return code;
1123 }
1124 }
1125 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1126 return NULL;
1127}
1128
1129
1130/* Module init */
1131
1132PyDoc_STRVAR(zipimport_doc,
1133"zipimport provides support for importing Python modules from Zip archives.\n\
1134\n\
1135This module exports three objects:\n\
1136- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001137- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001138 subclass of ImportError, so it can be caught as ImportError, too.\n\
1139- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1140 info dicts, as used in zipimporter._files.\n\
1141\n\
1142It is usually not needed to use the zipimport module explicitly; it is\n\
1143used by the builtin import mechanism for sys.path items that are paths\n\
1144to Zip archives.");
1145
1146PyMODINIT_FUNC
1147initzipimport(void)
1148{
1149 PyObject *mod;
1150
1151 if (PyType_Ready(&ZipImporter_Type) < 0)
1152 return;
1153
1154 /* Correct directory separator */
1155 zip_searchorder[0].suffix[0] = SEP;
1156 zip_searchorder[1].suffix[0] = SEP;
1157 zip_searchorder[2].suffix[0] = SEP;
1158 if (Py_OptimizeFlag) {
1159 /* Reverse *.pyc and *.pyo */
1160 struct st_zip_searchorder tmp;
1161 tmp = zip_searchorder[0];
1162 zip_searchorder[0] = zip_searchorder[1];
1163 zip_searchorder[1] = tmp;
1164 tmp = zip_searchorder[3];
1165 zip_searchorder[3] = zip_searchorder[4];
1166 zip_searchorder[4] = tmp;
1167 }
1168
1169 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1170 NULL, PYTHON_API_VERSION);
1171
1172 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1173 PyExc_ImportError, NULL);
1174 if (ZipImportError == NULL)
1175 return;
1176
1177 Py_INCREF(ZipImportError);
1178 if (PyModule_AddObject(mod, "ZipImportError",
1179 ZipImportError) < 0)
1180 return;
1181
1182 Py_INCREF(&ZipImporter_Type);
1183 if (PyModule_AddObject(mod, "zipimporter",
1184 (PyObject *)&ZipImporter_Type) < 0)
1185 return;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001186
Just van Rossum52e14d62002-12-30 22:08:05 +00001187 zip_directory_cache = PyDict_New();
1188 if (zip_directory_cache == NULL)
1189 return;
1190 Py_INCREF(zip_directory_cache);
1191 if (PyModule_AddObject(mod, "_zip_directory_cache",
1192 zip_directory_cache) < 0)
1193 return;
1194}