blob: c8f7e34438161b13ff2be914edd02d0ac4924f81 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
5#include "compile.h"
6#include <time.h>
7
8
9#define IS_SOURCE 0x0
10#define IS_BYTECODE 0x1
11#define IS_PACKAGE 0x2
12
13struct st_zip_searchorder {
14 char suffix[14];
15 int type;
16};
17
18/* zip_searchorder defines how we search for a module in the Zip
19 archive: we first search for a package __init__, then for
20 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
21 are swapped by initzipimport() if we run in optimized mode. Also,
22 '/' is replaced by SEP there. */
23struct st_zip_searchorder zip_searchorder[] = {
24 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
26 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
27 {".pyc", IS_BYTECODE},
28 {".pyo", IS_BYTECODE},
29 {".py", IS_SOURCE},
30 {"", 0}
31};
32
33/* zipimporter object definition and support */
34
35typedef struct _zipimporter ZipImporter;
36
37struct _zipimporter {
38 PyObject_HEAD
39 PyObject *archive; /* pathname of the Zip archive */
40 PyObject *prefix; /* file prefix: "a/sub/directory/" */
41 PyObject *files; /* dict with file info {path: toc_entry} */
42};
43
44static PyTypeObject ZipImporter_Type;
45static PyObject *ZipImportError;
46static PyObject *zip_directory_cache = NULL;
47
48/* forward decls */
49static PyObject *read_directory(char *archive);
50static PyObject *get_data(char *archive, PyObject *toc_entry);
51static PyObject *get_module_code(ZipImporter *self, char *fullname,
52 int *p_ispackage, char **p_modpath);
53
54
55#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
56
57
58/* zipimporter.__init__
59 Split the "subdirectory" from the Zip archive path, lookup a matching
60 entry in sys.path_importer_cache, fetch the file directory from there
61 if found, or else read it from the archive. */
62static int
63zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
64{
65 char *path, *p, *prefix, buf[MAXPATHLEN+2];
66 int len;
67
68 if (!PyArg_ParseTuple(args, "s:zipimporter",
69 &path))
70 return -1;
71
72 len = strlen(path);
73 if (len == 0) {
74 PyErr_SetString(ZipImportError, "archive path is empty");
75 return -1;
76 }
77 if (len >= MAXPATHLEN) {
78 PyErr_SetString(ZipImportError,
79 "archive path too long");
80 return -1;
81 }
82 strcpy(buf, path);
83
84#ifdef ALTSEP
85 for (p = buf; *p; p++) {
86 if (*p == ALTSEP)
87 *p = SEP;
88 }
89#endif
90
91 path = NULL;
92 prefix = NULL;
93 for (;;) {
94 struct stat statbuf;
95 int rv;
96
97 rv = stat(buf, &statbuf);
98 if (rv == 0) {
99 /* it exists */
100 if (S_ISREG(statbuf.st_mode))
101 /* it's a file */
102 path = buf;
103 break;
104 }
105 /* back up one path element */
106 p = strchr(buf, SEP);
107 if (prefix != NULL)
108 *prefix = SEP;
109 if (p == NULL)
110 break;
111 *p = '\0';
112 prefix = p;
113 }
114 if (path != NULL) {
115 PyObject *files;
116 files = PyDict_GetItemString(zip_directory_cache, path);
117 if (files == NULL) {
118 files = read_directory(buf);
119 if (files == NULL)
120 return -1;
121 if (PyDict_SetItemString(zip_directory_cache, path,
122 files) != 0)
123 return -1;
124 }
125 else
126 Py_INCREF(files);
127 self->files = files;
128 }
129 else {
130 PyErr_SetString(ZipImportError, "not a Zip file");
131 return -1;
132 }
133
134 if (prefix == NULL)
135 prefix = "";
136 else {
137 prefix++;
138 len = strlen(prefix);
139 if (prefix[len-1] != SEP) {
140 /* add trailing SEP */
141 prefix[len] = SEP;
142 prefix[len + 1] = '\0';
143 }
144 }
145
146 self->archive = PyString_FromString(buf);
147 if (self->archive == NULL)
148 return -1;
149
150 self->prefix = PyString_FromString(prefix);
151 if (self->prefix == NULL)
152 return -1;
153
154 return 0;
155}
156
157/* GC support. */
158static int
159zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
160{
161 ZipImporter *self = (ZipImporter *)obj;
162 int err;
163
164 if (self->files != NULL) {
165 err = visit(self->files, arg);
166 if (err)
167 return err;
168 }
169 return 0;
170}
171
172static void
173zipimporter_dealloc(ZipImporter *self)
174{
175 PyObject_GC_UnTrack(self);
176 Py_XDECREF(self->archive);
177 Py_XDECREF(self->files);
178 self->ob_type->tp_free((PyObject *)self);
179}
180
181static PyObject *
182zipimporter_repr(ZipImporter *self)
183{
184 char buf[500];
185 char *archive = "???";
186 char *prefix = "";
187
188 if (self->archive != NULL && PyString_Check(self->archive))
189 archive = PyString_AsString(self->archive);
190 if (self->prefix != NULL && PyString_Check(self->prefix))
191 prefix = PyString_AsString(self->prefix);
192 if (prefix != NULL && *prefix)
193 PyOS_snprintf(buf, sizeof(buf),
194 "<zipimporter object \"%.300s%c%.150s\">",
195 archive, SEP, prefix);
196 else
197 PyOS_snprintf(buf, sizeof(buf),
198 "<zipimporter object \"%.300s\">",
199 archive);
200 return PyString_FromString(buf);
201}
202
203/* return fullname.split(".")[-1] */
204static char *
205get_subname(char *fullname)
206{
207 char *subname = strrchr(fullname, '.');
208 if (subname == NULL)
209 subname = fullname;
210 else
211 subname++;
212 return subname;
213}
214
215/* Given a (sub)modulename, write the potential file path in the
216 archive (without extension) to the path buffer. Return the
217 length of the resulting string. */
218static int
219make_filename(char *prefix, char *name, char *path)
220{
221 int len;
222 char *p;
223
224 len = strlen(prefix);
225
226 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
227 if (len + strlen(name) + 13 >= MAXPATHLEN) {
228 PyErr_SetString(ZipImportError, "path too long");
229 return -1;
230 }
231
232 strcpy(path, prefix);
233 strcpy(path + len, name);
234 for (p = path + len; *p; p++) {
235 if (*p == '.')
236 *p = SEP;
237 }
238 len += strlen(name);
239 return len;
240}
241
242enum module_info {
243 MI_ERROR,
244 MI_NOT_FOUND,
245 MI_MODULE,
246 MI_PACKAGE
247};
248
249/* Return some information about a module. */
250static enum module_info
251get_module_info(ZipImporter *self, char *fullname)
252{
253 char *subname, path[MAXPATHLEN + 1];
254 int len;
255 struct st_zip_searchorder *zso;
256
257 subname = get_subname(fullname);
258
259 len = make_filename(PyString_AsString(self->prefix), subname, path);
260 if (len < 0)
261 return MI_ERROR;
262
263 for (zso = zip_searchorder; *zso->suffix; zso++) {
264 strcpy(path + len, zso->suffix);
265 if (PyDict_GetItemString(self->files, path) != NULL) {
266 if (zso->type & IS_PACKAGE)
267 return MI_PACKAGE;
268 else
269 return MI_MODULE;
270 }
271 }
272 return MI_NOT_FOUND;
273}
274
275/* Check whether we can satisfy the import of the module named by
276 'fullname'. Return self if we can, None if we can't. */
277static PyObject *
278zipimporter_find_module(PyObject *obj, PyObject *args)
279{
280 ZipImporter *self = (ZipImporter *)obj;
281 PyObject *path = NULL;
282 char *fullname;
283 enum module_info mi;
284
285 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
286 &fullname, &path))
287 return NULL;
288
289 mi = get_module_info(self, fullname);
290 if (mi == MI_ERROR)
291 return NULL;
292 if (mi == MI_NOT_FOUND) {
293 Py_INCREF(Py_None);
294 return Py_None;
295 }
296 Py_INCREF(self);
297 return (PyObject *)self;
298}
299
300/* Load and return the module named by 'fullname'. */
301static PyObject *
302zipimporter_load_module(PyObject *obj, PyObject *args)
303{
304 ZipImporter *self = (ZipImporter *)obj;
305 PyObject *code, *mod, *dict;
306 char *fullname, *modpath;
307 int ispackage;
308
309 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
310 &fullname))
311 return NULL;
312
313 code = get_module_code(self, fullname, &ispackage, &modpath);
314 if (code == NULL)
315 return NULL;
316
317 mod = PyImport_AddModule(fullname);
318 if (mod == NULL) {
319 Py_DECREF(code);
320 return NULL;
321 }
322 dict = PyModule_GetDict(mod);
323
324 /* mod.__loader__ = self */
325 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
326 goto error;
327
328 if (ispackage) {
329 /* add __path__ to the module *before* the code gets
330 executed */
331 PyObject *pkgpath, *fullpath;
332 char *prefix = PyString_AsString(self->prefix);
333 char *subname = get_subname(fullname);
334 int err;
335
336 fullpath = PyString_FromFormat("%s%c%s%s",
337 PyString_AsString(self->archive),
338 SEP,
339 *prefix ? prefix : "",
340 subname);
341 if (fullpath == NULL)
342 goto error;
343
344 pkgpath = Py_BuildValue("[O]", fullpath);
345 Py_DECREF(fullpath);
346 if (pkgpath == NULL)
347 goto error;
348 err = PyDict_SetItemString(dict, "__path__", pkgpath);
349 Py_DECREF(pkgpath);
350 if (err != 0)
351 goto error;
352 }
353 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
354 Py_DECREF(code);
355 if (Py_VerboseFlag)
356 PySys_WriteStderr("import %s # loaded from Zip %s\n",
357 fullname, modpath);
358 return mod;
359error:
360 Py_DECREF(code);
361 Py_DECREF(mod);
362 return NULL;
363}
364
365/* Return a bool signifying whether the module is a package or not. */
366static PyObject *
367zipimporter_is_package(PyObject *obj, PyObject *args)
368{
369 ZipImporter *self = (ZipImporter *)obj;
370 char *fullname;
371 enum module_info mi;
372
373 if (!PyArg_ParseTuple(args, "s:zipimporter.find_module",
374 &fullname))
375 return NULL;
376
377 mi = get_module_info(self, fullname);
378 if (mi == MI_ERROR)
379 return NULL;
380 if (mi == MI_NOT_FOUND) {
381 PyErr_Format(ZipImportError, "can't find module '%.200s'",
382 fullname);
383 return NULL;
384 }
385 return PyBool_FromLong(mi == MI_PACKAGE);
386}
387
388static PyObject *
389zipimporter_get_data(PyObject *obj, PyObject *args)
390{
391 ZipImporter *self = (ZipImporter *)obj;
392 char *path;
393#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000394 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000395#endif
396 PyObject *toc_entry;
397 int len;
398
399 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
400 return NULL;
401
402#ifdef ALTSEP
403 if (strlen(path) >= MAXPATHLEN) {
404 PyErr_SetString(ZipImportError, "path too long");
405 return NULL;
406 }
407 strcpy(buf, path);
408 for (p = buf; *p; p++) {
409 if (*p == ALTSEP)
410 *p = SEP;
411 }
412 path = buf;
413#endif
414 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000415 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000416 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
417 path[len] == SEP) {
418 path = path + len + 1;
419 }
420
421 toc_entry = PyDict_GetItemString(self->files, path);
422 if (toc_entry == NULL) {
423 PyErr_Format(PyExc_IOError, "file not found [%.200s]",
424 path);
425 return NULL;
426 }
427 return get_data(PyString_AsString(self->archive), toc_entry);
428}
429
430static PyObject *
431zipimporter_get_code(PyObject *obj, PyObject *args)
432{
433 ZipImporter *self = (ZipImporter *)obj;
434 char *fullname;
435
436 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
437 return NULL;
438
439 return get_module_code(self, fullname, NULL, NULL);
440}
441
442static PyObject *
443zipimporter_get_source(PyObject *obj, PyObject *args)
444{
445 ZipImporter *self = (ZipImporter *)obj;
446 PyObject *toc_entry;
447 char *fullname, *subname, path[MAXPATHLEN+1];
448 int len;
449 enum module_info mi;
450
451 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
452 return NULL;
453
454 mi = get_module_info(self, fullname);
455 if (mi == MI_ERROR)
456 return NULL;
457 if (mi == MI_NOT_FOUND) {
458 PyErr_Format(ZipImportError, "can't find module '%.200s'",
459 fullname);
460 return NULL;
461 }
462 subname = get_subname(fullname);
463
464 len = make_filename(PyString_AsString(self->prefix), subname, path);
465 if (len < 0)
466 return NULL;
467
468 if (mi == MI_PACKAGE) {
469 path[len] = SEP;
470 strcpy(path + len + 1, "__init__.py");
471 }
472 else
473 strcpy(path + len, ".py");
474
475 toc_entry = PyDict_GetItemString(self->files, path);
476 if (toc_entry != NULL)
477 return get_data(PyString_AsString(self->archive), toc_entry);
478
479 /* we have the module, but no source */
480 Py_INCREF(Py_None);
481 return Py_None;
482}
483
484PyDoc_STRVAR(doc_find_module,
485"find_module(fullname, path=None) -> self or None.\n\
486\n\
487Search for a module specified by 'fullname'. 'fullname' must be the\n\
488fully qualified (dotted) module name. It returns the zipimporter\n\
489instance itself if the module was found, or None if it wasn't.\n\
490The optional 'path' argument is ignored -- it's there for compatibility\n\
491with the importer protocol.");
492
493PyDoc_STRVAR(doc_load_module,
494"load_module(fullname) -> module.\n\
495\n\
496Load the module specified by 'fullname'. 'fullname' must be the\n\
497fully qualified (dotted) module name. It returns the imported\n\
498module, or raises ZipImportError if it wasn't found.");
499
500PyDoc_STRVAR(doc_get_data,
501"get_data(pathname) -> string with file data.\n\
502\n\
503Return the data associated with 'pathname'. Raise IOError if\n\
504the file wasn't found.");
505
506PyDoc_STRVAR(doc_is_package,
507"is_package(fullname) -> bool.\n\
508\n\
509Return True if the module specified by fullname is a package.\n\
510Raise ZipImportError is the module couldn't be found.");
511
512PyDoc_STRVAR(doc_get_code,
513"get_code(fullname) -> code object.\n\
514\n\
515Return the code object for the specified module. Raise ZipImportError\n\
516is the module couldn't be found.");
517
518PyDoc_STRVAR(doc_get_source,
519"get_source(fullname) -> source string.\n\
520\n\
521Return the source code for the specified module. Raise ZipImportError\n\
522is the module couldn't be found, return None if the archive does\n\
523contain the module, but has no source for it.");
524
525static PyMethodDef zipimporter_methods[] = {
526 {"find_module", zipimporter_find_module, METH_VARARGS,
527 doc_find_module},
528 {"load_module", zipimporter_load_module, METH_VARARGS,
529 doc_load_module},
530 {"get_data", zipimporter_get_data, METH_VARARGS,
531 doc_get_data},
532 {"get_code", zipimporter_get_code, METH_VARARGS,
533 doc_get_code},
534 {"get_source", zipimporter_get_source, METH_VARARGS,
535 doc_get_source},
536 {"is_package", zipimporter_is_package, METH_VARARGS,
537 doc_is_package},
538 {NULL, NULL} /* sentinel */
539};
540
541static PyMemberDef zipimporter_members[] = {
542 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
543 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
544 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
545 {NULL}
546};
547
548PyDoc_STRVAR(zipimporter_doc,
549"zipimporter(archivepath) -> zipimporter object\n\
550\n\
551Create a new zipimporter instance. 'archivepath' must be a path to\n\
552a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
553a valid Zip archive.");
554
555#define DEFERRED_ADDRESS(ADDR) 0
556
557static PyTypeObject ZipImporter_Type = {
558 PyObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type))
559 0,
560 "zipimport.zipimporter",
561 sizeof(ZipImporter),
562 0, /* tp_itemsize */
563 (destructor)zipimporter_dealloc, /* tp_dealloc */
564 0, /* tp_print */
565 0, /* tp_getattr */
566 0, /* tp_setattr */
567 0, /* tp_compare */
568 (reprfunc)zipimporter_repr, /* tp_repr */
569 0, /* tp_as_number */
570 0, /* tp_as_sequence */
571 0, /* tp_as_mapping */
572 0, /* tp_hash */
573 0, /* tp_call */
574 0, /* tp_str */
575 PyObject_GenericGetAttr, /* tp_getattro */
576 0, /* tp_setattro */
577 0, /* tp_as_buffer */
578 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
579 Py_TPFLAGS_HAVE_GC, /* tp_flags */
580 zipimporter_doc, /* tp_doc */
581 zipimporter_traverse, /* tp_traverse */
582 0, /* tp_clear */
583 0, /* tp_richcompare */
584 0, /* tp_weaklistoffset */
585 0, /* tp_iter */
586 0, /* tp_iternext */
587 zipimporter_methods, /* tp_methods */
588 zipimporter_members, /* tp_members */
589 0, /* tp_getset */
590 0, /* tp_base */
591 0, /* tp_dict */
592 0, /* tp_descr_get */
593 0, /* tp_descr_set */
594 0, /* tp_dictoffset */
595 (initproc)zipimporter_init, /* tp_init */
596 PyType_GenericAlloc, /* tp_alloc */
597 PyType_GenericNew, /* tp_new */
598 PyObject_GC_Del, /* tp_free */
599};
600
601
602/* implementation */
603
604/* Given a buffer, return the short that is represented by the first
605 2 bytes, encoded as little endian. This partially reimplements
606 marshal.c:r_short(). */
607static int
608get_short(unsigned char *buf)
609{
610 short x;
611 x = buf[0];
612 x |= buf[1] << 8;
613 /* Sign-extension, in case short greater than 16 bits */
614 x |= -(x & 0x8000);
615 return x;
616}
617
618/* Given a buffer, return the long that is represented by the first
619 4 bytes, encoded as little endian. This partially reimplements
620 marshal.c:r_long() */
621static long
622get_long(unsigned char *buf) {
623 long x;
624 x = buf[0];
625 x |= (long)buf[1] << 8;
626 x |= (long)buf[2] << 16;
627 x |= (long)buf[3] << 24;
628#if SIZEOF_LONG > 4
629 /* Sign extension for 64-bit machines */
630 x |= -(x & 0x80000000L);
631#endif
632 return x;
633}
634
635/*
636 read_directory(archive) -> files dict (new reference)
637
638 Given a path to a Zip archive, build a dict, mapping file names
639 (local to the archive, using SEP as a separator) to toc entries.
640
641 A toc_entry is a tuple:
642
643 (compress, # compression kind; 0 for uncompressed
644 data_size, # size of compressed data on disk
645 file_size, # size of decompressed data
646 file_offset, # offset of file header from start of archive
647 time, # mod time of file (in dos format)
648 date, # mod data of file (in dos format)
649 crc, # crc checksum of the data
650 )
651
652 Directories can be recognized by the trailing SEP in the name,
653 data_size and file_offset are 0.
654*/
655static PyObject *
656read_directory(char *archive)
657{
658 PyObject *files = NULL;
659 FILE *fp;
660 long compress, crc, data_size, file_size, file_offset, date, time;
661 long header_offset, name_size, header_size, header_end;
662 long i, l, length, count;
663 char path[MAXPATHLEN + 5];
664 char name[MAXPATHLEN + 5];
665 char *p, endof_central_dir[22];
666
667 if (strlen(archive) > MAXPATHLEN) {
668 PyErr_SetString(PyExc_OverflowError,
669 "Zip path name is too long");
670 return NULL;
671 }
672 strcpy(path, archive);
673
674 fp = fopen(archive, "rb");
675 if (fp == NULL) {
676 PyErr_Format(ZipImportError, "can't open Zip file: "
677 "'%.200s'", archive);
678 return NULL;
679 }
680 fseek(fp, -22, 2); /* Seek from end of file */
681 header_end = ftell(fp);
682 if (fread(endof_central_dir, 1, 22, fp) != 22) {
683 fclose(fp);
684 PyErr_Format(ZipImportError, "can't read Zip file: "
685 "'%.200s'", archive);
686 return NULL;
687 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000688 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000689 /* Bad: End of Central Dir signature */
690 fclose(fp);
691 PyErr_Format(ZipImportError, "not a Zip file: "
692 "'%.200s'", archive);
693 return NULL;
694 }
695
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000696 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Just van Rossum52e14d62002-12-30 22:08:05 +0000697
698 files = PyDict_New();
699 if (files == NULL)
700 goto error;
701
702 length = (long)strlen(path);
703 path[length] = SEP;
704
705 /* Start of Central Directory */
706 count = 0;
707 for (;;) {
708 PyObject *t;
709 int err;
710
711 fseek(fp, header_offset, 0); /* Start of file header */
712 l = PyMarshal_ReadLongFromFile(fp);
713 if (l != 0x02014B50)
714 break; /* Bad: Central Dir File Header */
715 fseek(fp, header_offset + 10, 0);
716 compress = PyMarshal_ReadShortFromFile(fp);
717 time = PyMarshal_ReadShortFromFile(fp);
718 date = PyMarshal_ReadShortFromFile(fp);
719 crc = PyMarshal_ReadLongFromFile(fp);
720 data_size = PyMarshal_ReadLongFromFile(fp);
721 file_size = PyMarshal_ReadLongFromFile(fp);
722 name_size = PyMarshal_ReadShortFromFile(fp);
723 header_size = 46 + name_size +
724 PyMarshal_ReadShortFromFile(fp) +
725 PyMarshal_ReadShortFromFile(fp);
726 fseek(fp, header_offset + 42, 0);
727 file_offset = PyMarshal_ReadLongFromFile(fp);
728 if (name_size > MAXPATHLEN)
729 name_size = MAXPATHLEN;
730
731 p = name;
732 for (i = 0; i < name_size; i++) {
733 *p = (char)getc(fp);
734 if (*p == '/')
735 *p = SEP;
736 p++;
737 }
738 *p = 0; /* Add terminating null byte */
739 header_offset += header_size;
740
741 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
742
743 t = Py_BuildValue("siiiiiii", path, compress, data_size,
744 file_size, file_offset, time, date, crc);
745 if (t == NULL)
746 goto error;
747 err = PyDict_SetItemString(files, name, t);
748 Py_DECREF(t);
749 if (err != 0)
750 goto error;
751 count++;
752 }
753 fclose(fp);
754 if (Py_VerboseFlag)
755 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
756 count, archive);
757 return files;
758error:
759 fclose(fp);
760 Py_XDECREF(files);
761 return NULL;
762}
763
764/* Return the zlib.decompress function object, or NULL if zlib couldn't
765 be imported. The function is cached when found, so subsequent calls
766 don't import zlib again. Returns a *borrowed* reference.
767 XXX This makes zlib.decompress immortal. */
768static PyObject *
769get_decompress_func(void)
770{
771 static PyObject *decompress = NULL;
772
773 if (decompress == NULL) {
774 PyObject *zlib;
775 static int importing_zlib = 0;
776
777 if (importing_zlib != 0)
778 /* Someone has a zlib.py[co] in their Zip file;
779 let's avoid a stack overflow. */
780 return NULL;
781 importing_zlib = 1;
782 zlib = PyImport_ImportModule("zlib"); /* import zlib */
783 importing_zlib = 0;
784 if (zlib != NULL) {
785 decompress = PyObject_GetAttrString(zlib,
786 "decompress");
787 Py_DECREF(zlib);
788 }
789 else
790 PyErr_Clear();
791 if (Py_VerboseFlag)
792 PySys_WriteStderr("# zipimport: zlib %s\n",
793 zlib != NULL ? "available": "UNAVAILABLE");
794 }
795 return decompress;
796}
797
798/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
799 data as a new reference. */
800static PyObject *
801get_data(char *archive, PyObject *toc_entry)
802{
803 PyObject *raw_data, *data = NULL, *decompress;
804 char *buf;
805 FILE *fp;
806 int err, bytes_read = 0;
807 long l;
808 char *datapath;
809 long compress, data_size, file_size, file_offset;
810 long time, date, crc;
811
812 if (!PyArg_ParseTuple(toc_entry, "siiiiiii", &datapath, &compress,
813 &data_size, &file_size, &file_offset, &time,
814 &date, &crc)) {
815 return NULL;
816 }
817
818 fp = fopen(archive, "rb");
819 if (!fp) {
820 PyErr_Format(PyExc_IOError,
821 "zipimport: can not open file %s", archive);
822 return NULL;
823 }
824
825 /* Check to make sure the local file header is correct */
826 fseek(fp, file_offset, 0);
827 l = PyMarshal_ReadLongFromFile(fp);
828 if (l != 0x04034B50) {
829 /* Bad: Local File Header */
830 PyErr_Format(ZipImportError,
831 "bad local file header in %s",
832 archive);
833 fclose(fp);
834 return NULL;
835 }
836 fseek(fp, file_offset + 26, 0);
837 l = 30 + PyMarshal_ReadShortFromFile(fp) +
838 PyMarshal_ReadShortFromFile(fp); /* local header size */
839 file_offset += l; /* Start of file data */
840
841 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
842 data_size : data_size + 1);
843 if (raw_data == NULL) {
844 fclose(fp);
845 return NULL;
846 }
847 buf = PyString_AsString(raw_data);
848
849 err = fseek(fp, file_offset, 0);
850 if (err == 0)
851 bytes_read = fread(buf, 1, data_size, fp);
852 fclose(fp);
853 if (err || bytes_read != data_size) {
854 PyErr_SetString(PyExc_IOError,
855 "zipimport: can't read data");
856 Py_DECREF(raw_data);
857 return NULL;
858 }
859
860 if (compress != 0) {
861 buf[data_size] = 'Z'; /* saw this in zipfile.py */
862 data_size++;
863 }
864 buf[data_size] = '\0';
865
866 if (compress == 0) /* data is not compressed */
867 return raw_data;
868
869 /* Decompress with zlib */
870 decompress = get_decompress_func();
871 if (decompress == NULL) {
872 PyErr_SetString(ZipImportError,
873 "can't decompress data; "
874 "zlib not available");
875 goto error;
876 }
877 data = PyObject_CallFunction(decompress, "Ol", raw_data, -15);
878error:
879 Py_DECREF(raw_data);
880 return data;
881}
882
883/* Lenient date/time comparison function. The precision of the mtime
884 in the archive is lower than the mtime stored in a .pyc: we
885 must allow a difference of at most one second. */
886static int
887eq_mtime(time_t t1, time_t t2)
888{
889 time_t d = t1 - t2;
890 if (d < 0)
891 d = -d;
892 /* dostime only stores even seconds, so be lenient */
893 return d <= 1;
894}
895
896/* Given the contents of a .py[co] file in a buffer, unmarshal the data
897 and return the code object. Return None if it the magic word doesn't
898 match (we do this instead of raising an exception as we fall back
899 to .py if available and we don't want to mask other errors).
900 Returns a new reference. */
901static PyObject *
902unmarshal_code(char *pathname, PyObject *data, time_t mtime)
903{
904 PyObject *code;
905 char *buf = PyString_AsString(data);
906 int size = PyString_Size(data);
907
908 if (size <= 9) {
909 PyErr_SetString(ZipImportError,
910 "bad pyc data");
911 return NULL;
912 }
913
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000914 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000915 if (Py_VerboseFlag)
916 PySys_WriteStderr("# %s has bad magic\n",
917 pathname);
918 Py_INCREF(Py_None);
919 return Py_None; /* signal caller to try alternative */
920 }
921
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000922 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4), mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000923 if (Py_VerboseFlag)
924 PySys_WriteStderr("# %s has bad mtime\n",
925 pathname);
926 Py_INCREF(Py_None);
927 return Py_None; /* signal caller to try alternative */
928 }
929
930 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
931 if (code == NULL)
932 return NULL;
933 if (!PyCode_Check(code)) {
934 Py_DECREF(code);
935 PyErr_Format(PyExc_TypeError,
936 "compiled module %.200s is not a code object",
937 pathname);
938 return NULL;
939 }
940 return code;
941}
942
943/* Replace any occurances of "\r\n?" in the input string with "\n".
944 This converts DOS and Mac line endings to Unix line endings.
945 Also append a trailing "\n" to be compatible with
946 PyParser_SimpleParseFile(). Returns a new reference. */
947static PyObject *
948normalize_line_endings(PyObject *source)
949{
950 char *q, *p = PyString_AsString(source);
951 int length = PyString_Size(source) + 1;
952 PyObject *fixed_source;
953
954 fixed_source = PyString_FromStringAndSize(p, length);
955 if (fixed_source == NULL)
956 return NULL;
957
958 q = PyString_AsString(fixed_source);
959 /* replace "\r\n?" by "\n" */
960 for (;;) {
961 if (*p == '\r') {
962 *q++ = '\n';
963 if (*(p + 1) == '\n') {
964 p++;
965 length--;
966 }
967 }
968 else
969 *q++ = *p;
970 if (*p == '\0')
971 break;
972 p++;
973 }
974 *q++ = '\n'; /* add trailing \n */
975 *q = '\0';
976 _PyString_Resize(&fixed_source, length);
977 return fixed_source;
978}
979
980/* Given a string buffer containing Python source code, compile it
981 return and return a code object as a new reference. */
982static PyObject *
983compile_source(char *pathname, PyObject *source)
984{
985 PyObject *code, *fixed_source;
986
987 fixed_source = normalize_line_endings(source);
988 if (fixed_source == NULL)
989 return NULL;
990
991 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
992 Py_file_input);
993 Py_DECREF(fixed_source);
994 return code;
995}
996
997/* Convert the date/time values found in the Zip archive to a value
998 that's compatible with the time stamp stored in .pyc files. */
999time_t parse_dostime(int dostime, int dosdate)
1000{
1001 struct tm stm;
1002
1003 stm.tm_sec = (dostime & 0x1f) * 2;
1004 stm.tm_min = (dostime >> 5) & 0x3f;
1005 stm.tm_hour = (dostime >> 11) & 0x1f;
1006 stm.tm_mday = dosdate & 0x1f;
1007 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1008 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
1009 stm.tm_isdst = 0; /* wday/yday is ignored */
1010
1011 return mktime(&stm);
1012}
1013
1014/* Given a path to a .pyc or .pyo file in the archive, return the
1015 modifictaion time of the matching .py file, or 0 if no source
1016 is available. */
1017static time_t
1018get_mtime_of_source(ZipImporter *self, char *path)
1019{
1020 PyObject *toc_entry;
1021 time_t mtime = 0;
1022 int lastchar = strlen(path) - 1;
1023 char savechar = path[lastchar];
1024 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1025 toc_entry = PyDict_GetItemString(self->files, path);
1026 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1027 PyTuple_Size(toc_entry) == 8) {
1028 /* fetch the time stamp of the .py file for comparison
1029 with an embedded pyc time stamp */
1030 int time, date;
1031 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1032 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1033 mtime = parse_dostime(time, date);
1034 }
1035 path[lastchar] = savechar;
1036 return mtime;
1037}
1038
1039/* Return the code object for the module named by 'fullname' from the
1040 Zip archive as a new reference. */
1041static PyObject *
1042get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1043 time_t mtime, PyObject *toc_entry)
1044{
1045 PyObject *data, *code;
1046 char *modpath;
1047 char *archive = PyString_AsString(self->archive);
1048
1049 if (archive == NULL)
1050 return NULL;
1051
1052 data = get_data(archive, toc_entry);
1053 if (data == NULL)
1054 return NULL;
1055
1056 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1057
1058 if (isbytecode) {
1059 code = unmarshal_code(modpath, data, mtime);
1060 }
1061 else {
1062 code = compile_source(modpath, data);
1063 }
1064 Py_DECREF(data);
1065 return code;
1066}
1067
1068/* Get the code object assoiciated with the module specified by
1069 'fullname'. */
1070static PyObject *
1071get_module_code(ZipImporter *self, char *fullname,
1072 int *p_ispackage, char **p_modpath)
1073{
1074 PyObject *toc_entry;
1075 char *subname, path[MAXPATHLEN + 1];
1076 int len;
1077 struct st_zip_searchorder *zso;
1078
1079 subname = get_subname(fullname);
1080
1081 len = make_filename(PyString_AsString(self->prefix), subname, path);
1082 if (len < 0)
1083 return NULL;
1084
1085 for (zso = zip_searchorder; *zso->suffix; zso++) {
1086 PyObject *code = NULL;
1087
1088 strcpy(path + len, zso->suffix);
1089 if (Py_VerboseFlag > 1)
1090 PySys_WriteStderr("# trying %s%c%s\n",
1091 PyString_AsString(self->archive),
1092 SEP, path);
1093 toc_entry = PyDict_GetItemString(self->files, path);
1094 if (toc_entry != NULL) {
1095 time_t mtime = 0;
1096 int ispackage = zso->type & IS_PACKAGE;
1097 int isbytecode = zso->type & IS_BYTECODE;
1098
1099 if (isbytecode)
1100 mtime = get_mtime_of_source(self, path);
1101 if (p_ispackage != NULL)
1102 *p_ispackage = ispackage;
1103 code = get_code_from_data(self, ispackage,
1104 isbytecode, mtime,
1105 toc_entry);
1106 if (code == Py_None) {
1107 /* bad magic number or non-matching mtime
1108 in byte code, try next */
1109 Py_DECREF(code);
1110 continue;
1111 }
1112 if (code != NULL && p_modpath != NULL)
1113 *p_modpath = PyString_AsString(
1114 PyTuple_GetItem(toc_entry, 0));
1115 return code;
1116 }
1117 }
1118 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1119 return NULL;
1120}
1121
1122
1123/* Module init */
1124
1125PyDoc_STRVAR(zipimport_doc,
1126"zipimport provides support for importing Python modules from Zip archives.\n\
1127\n\
1128This module exports three objects:\n\
1129- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
1130- ZipImporterError: exception raised by zipimporter objects. It's a\n\
1131 subclass of ImportError, so it can be caught as ImportError, too.\n\
1132- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1133 info dicts, as used in zipimporter._files.\n\
1134\n\
1135It is usually not needed to use the zipimport module explicitly; it is\n\
1136used by the builtin import mechanism for sys.path items that are paths\n\
1137to Zip archives.");
1138
1139PyMODINIT_FUNC
1140initzipimport(void)
1141{
1142 PyObject *mod;
1143
1144 if (PyType_Ready(&ZipImporter_Type) < 0)
1145 return;
1146
1147 /* Correct directory separator */
1148 zip_searchorder[0].suffix[0] = SEP;
1149 zip_searchorder[1].suffix[0] = SEP;
1150 zip_searchorder[2].suffix[0] = SEP;
1151 if (Py_OptimizeFlag) {
1152 /* Reverse *.pyc and *.pyo */
1153 struct st_zip_searchorder tmp;
1154 tmp = zip_searchorder[0];
1155 zip_searchorder[0] = zip_searchorder[1];
1156 zip_searchorder[1] = tmp;
1157 tmp = zip_searchorder[3];
1158 zip_searchorder[3] = zip_searchorder[4];
1159 zip_searchorder[4] = tmp;
1160 }
1161
1162 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1163 NULL, PYTHON_API_VERSION);
1164
1165 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1166 PyExc_ImportError, NULL);
1167 if (ZipImportError == NULL)
1168 return;
1169
1170 Py_INCREF(ZipImportError);
1171 if (PyModule_AddObject(mod, "ZipImportError",
1172 ZipImportError) < 0)
1173 return;
1174
1175 Py_INCREF(&ZipImporter_Type);
1176 if (PyModule_AddObject(mod, "zipimporter",
1177 (PyObject *)&ZipImporter_Type) < 0)
1178 return;
1179
1180 zip_directory_cache = PyDict_New();
1181 if (zip_directory_cache == NULL)
1182 return;
1183 Py_INCREF(zip_directory_cache);
1184 if (PyModule_AddObject(mod, "_zip_directory_cache",
1185 zip_directory_cache) < 0)
1186 return;
1187}