blob: 3e376562f3376cce005ef13e2fb319469e9043af [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
13 char suffix[14];
14 int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Just van Rossum52e14d62002-12-30 22:08:05 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
41};
42
Just van Rossum52e14d62002-12-30 22:08:05 +000043static PyObject *ZipImportError;
44static PyObject *zip_directory_cache = NULL;
45
46/* forward decls */
47static PyObject *read_directory(char *archive);
48static PyObject *get_data(char *archive, PyObject *toc_entry);
49static PyObject *get_module_code(ZipImporter *self, char *fullname,
50 int *p_ispackage, char **p_modpath);
51
52
53#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
54
55
56/* zipimporter.__init__
57 Split the "subdirectory" from the Zip archive path, lookup a matching
58 entry in sys.path_importer_cache, fetch the file directory from there
59 if found, or else read it from the archive. */
60static int
61zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
62{
63 char *path, *p, *prefix, buf[MAXPATHLEN+2];
Neal Norwitzd39d8612006-01-08 01:03:36 +000064 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +000065
Georg Brandl02c42872005-08-26 06:42:30 +000066 if (!_PyArg_NoKeywords("zipimporter()", kwds))
67 return -1;
68
Just van Rossum52e14d62002-12-30 22:08:05 +000069 if (!PyArg_ParseTuple(args, "s:zipimporter",
70 &path))
71 return -1;
72
73 len = strlen(path);
74 if (len == 0) {
75 PyErr_SetString(ZipImportError, "archive path is empty");
76 return -1;
77 }
78 if (len >= MAXPATHLEN) {
79 PyErr_SetString(ZipImportError,
80 "archive path too long");
81 return -1;
82 }
83 strcpy(buf, path);
84
85#ifdef ALTSEP
86 for (p = buf; *p; p++) {
87 if (*p == ALTSEP)
88 *p = SEP;
89 }
90#endif
91
92 path = NULL;
93 prefix = NULL;
94 for (;;) {
Martin v. Löwisa94568a2003-05-10 07:36:56 +000095#ifndef RISCOS
Just van Rossum52e14d62002-12-30 22:08:05 +000096 struct stat statbuf;
97 int rv;
98
99 rv = stat(buf, &statbuf);
100 if (rv == 0) {
101 /* it exists */
102 if (S_ISREG(statbuf.st_mode))
103 /* it's a file */
104 path = buf;
105 break;
106 }
Martin v. Löwisa94568a2003-05-10 07:36:56 +0000107#else
108 if (object_exists(buf)) {
109 /* it exists */
110 if (isfile(buf))
111 /* it's a file */
112 path = buf;
113 break;
114 }
115#endif
Just van Rossum52e14d62002-12-30 22:08:05 +0000116 /* back up one path element */
Just van Rossumd35c6db2003-01-02 12:55:48 +0000117 p = strrchr(buf, SEP);
Just van Rossum52e14d62002-12-30 22:08:05 +0000118 if (prefix != NULL)
119 *prefix = SEP;
120 if (p == NULL)
121 break;
122 *p = '\0';
123 prefix = p;
124 }
125 if (path != NULL) {
126 PyObject *files;
127 files = PyDict_GetItemString(zip_directory_cache, path);
128 if (files == NULL) {
129 files = read_directory(buf);
130 if (files == NULL)
131 return -1;
132 if (PyDict_SetItemString(zip_directory_cache, path,
133 files) != 0)
134 return -1;
135 }
136 else
137 Py_INCREF(files);
138 self->files = files;
139 }
140 else {
141 PyErr_SetString(ZipImportError, "not a Zip file");
142 return -1;
143 }
144
145 if (prefix == NULL)
146 prefix = "";
147 else {
148 prefix++;
149 len = strlen(prefix);
150 if (prefix[len-1] != SEP) {
151 /* add trailing SEP */
152 prefix[len] = SEP;
153 prefix[len + 1] = '\0';
154 }
155 }
156
157 self->archive = PyString_FromString(buf);
158 if (self->archive == NULL)
159 return -1;
160
161 self->prefix = PyString_FromString(prefix);
162 if (self->prefix == NULL)
163 return -1;
164
165 return 0;
166}
167
168/* GC support. */
169static int
170zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
171{
172 ZipImporter *self = (ZipImporter *)obj;
173 int err;
174
175 if (self->files != NULL) {
176 err = visit(self->files, arg);
177 if (err)
178 return err;
179 }
180 return 0;
181}
182
183static void
184zipimporter_dealloc(ZipImporter *self)
185{
186 PyObject_GC_UnTrack(self);
187 Py_XDECREF(self->archive);
Just van Rossumf8b6de12002-12-31 09:51:59 +0000188 Py_XDECREF(self->prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000189 Py_XDECREF(self->files);
190 self->ob_type->tp_free((PyObject *)self);
191}
192
193static PyObject *
194zipimporter_repr(ZipImporter *self)
195{
196 char buf[500];
197 char *archive = "???";
198 char *prefix = "";
199
200 if (self->archive != NULL && PyString_Check(self->archive))
201 archive = PyString_AsString(self->archive);
202 if (self->prefix != NULL && PyString_Check(self->prefix))
203 prefix = PyString_AsString(self->prefix);
204 if (prefix != NULL && *prefix)
205 PyOS_snprintf(buf, sizeof(buf),
206 "<zipimporter object \"%.300s%c%.150s\">",
207 archive, SEP, prefix);
208 else
209 PyOS_snprintf(buf, sizeof(buf),
210 "<zipimporter object \"%.300s\">",
211 archive);
212 return PyString_FromString(buf);
213}
214
215/* return fullname.split(".")[-1] */
216static char *
217get_subname(char *fullname)
218{
219 char *subname = strrchr(fullname, '.');
220 if (subname == NULL)
221 subname = fullname;
222 else
223 subname++;
224 return subname;
225}
226
227/* Given a (sub)modulename, write the potential file path in the
228 archive (without extension) to the path buffer. Return the
229 length of the resulting string. */
230static int
231make_filename(char *prefix, char *name, char *path)
232{
Neal Norwitzd39d8612006-01-08 01:03:36 +0000233 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000234 char *p;
235
236 len = strlen(prefix);
237
238 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
239 if (len + strlen(name) + 13 >= MAXPATHLEN) {
240 PyErr_SetString(ZipImportError, "path too long");
241 return -1;
242 }
243
244 strcpy(path, prefix);
245 strcpy(path + len, name);
246 for (p = path + len; *p; p++) {
247 if (*p == '.')
248 *p = SEP;
249 }
250 len += strlen(name);
Martin v. Löwis18e16552006-02-15 17:27:45 +0000251 assert(len < INT_MAX);
Neal Norwitzd39d8612006-01-08 01:03:36 +0000252 return (int)len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000253}
254
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000255enum zi_module_info {
Just van Rossum52e14d62002-12-30 22:08:05 +0000256 MI_ERROR,
257 MI_NOT_FOUND,
258 MI_MODULE,
259 MI_PACKAGE
260};
261
262/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000263static enum zi_module_info
Just van Rossum52e14d62002-12-30 22:08:05 +0000264get_module_info(ZipImporter *self, char *fullname)
265{
266 char *subname, path[MAXPATHLEN + 1];
267 int len;
268 struct st_zip_searchorder *zso;
269
270 subname = get_subname(fullname);
271
272 len = make_filename(PyString_AsString(self->prefix), subname, path);
273 if (len < 0)
274 return MI_ERROR;
275
276 for (zso = zip_searchorder; *zso->suffix; zso++) {
277 strcpy(path + len, zso->suffix);
278 if (PyDict_GetItemString(self->files, path) != NULL) {
279 if (zso->type & IS_PACKAGE)
280 return MI_PACKAGE;
281 else
282 return MI_MODULE;
283 }
284 }
285 return MI_NOT_FOUND;
286}
287
288/* Check whether we can satisfy the import of the module named by
289 'fullname'. Return self if we can, None if we can't. */
290static PyObject *
291zipimporter_find_module(PyObject *obj, PyObject *args)
292{
293 ZipImporter *self = (ZipImporter *)obj;
294 PyObject *path = NULL;
295 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000296 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000297
298 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
299 &fullname, &path))
300 return NULL;
301
302 mi = get_module_info(self, fullname);
303 if (mi == MI_ERROR)
304 return NULL;
305 if (mi == MI_NOT_FOUND) {
306 Py_INCREF(Py_None);
307 return Py_None;
308 }
309 Py_INCREF(self);
310 return (PyObject *)self;
311}
312
313/* Load and return the module named by 'fullname'. */
314static PyObject *
315zipimporter_load_module(PyObject *obj, PyObject *args)
316{
317 ZipImporter *self = (ZipImporter *)obj;
318 PyObject *code, *mod, *dict;
319 char *fullname, *modpath;
320 int ispackage;
321
322 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
323 &fullname))
324 return NULL;
325
326 code = get_module_code(self, fullname, &ispackage, &modpath);
327 if (code == NULL)
328 return NULL;
329
330 mod = PyImport_AddModule(fullname);
331 if (mod == NULL) {
332 Py_DECREF(code);
333 return NULL;
334 }
335 dict = PyModule_GetDict(mod);
336
337 /* mod.__loader__ = self */
338 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
339 goto error;
340
341 if (ispackage) {
342 /* add __path__ to the module *before* the code gets
343 executed */
344 PyObject *pkgpath, *fullpath;
345 char *prefix = PyString_AsString(self->prefix);
346 char *subname = get_subname(fullname);
347 int err;
348
349 fullpath = PyString_FromFormat("%s%c%s%s",
350 PyString_AsString(self->archive),
351 SEP,
352 *prefix ? prefix : "",
353 subname);
354 if (fullpath == NULL)
355 goto error;
356
357 pkgpath = Py_BuildValue("[O]", fullpath);
358 Py_DECREF(fullpath);
359 if (pkgpath == NULL)
360 goto error;
361 err = PyDict_SetItemString(dict, "__path__", pkgpath);
362 Py_DECREF(pkgpath);
363 if (err != 0)
364 goto error;
365 }
366 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
367 Py_DECREF(code);
368 if (Py_VerboseFlag)
369 PySys_WriteStderr("import %s # loaded from Zip %s\n",
370 fullname, modpath);
371 return mod;
372error:
373 Py_DECREF(code);
374 Py_DECREF(mod);
375 return NULL;
376}
377
378/* Return a bool signifying whether the module is a package or not. */
379static PyObject *
380zipimporter_is_package(PyObject *obj, PyObject *args)
381{
382 ZipImporter *self = (ZipImporter *)obj;
383 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000384 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000385
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000386 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
Just van Rossum52e14d62002-12-30 22:08:05 +0000387 &fullname))
388 return NULL;
389
390 mi = get_module_info(self, fullname);
391 if (mi == MI_ERROR)
392 return NULL;
393 if (mi == MI_NOT_FOUND) {
394 PyErr_Format(ZipImportError, "can't find module '%.200s'",
395 fullname);
396 return NULL;
397 }
398 return PyBool_FromLong(mi == MI_PACKAGE);
399}
400
401static PyObject *
402zipimporter_get_data(PyObject *obj, PyObject *args)
403{
404 ZipImporter *self = (ZipImporter *)obj;
405 char *path;
406#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000407 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000408#endif
409 PyObject *toc_entry;
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000410 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000411
412 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
413 return NULL;
414
415#ifdef ALTSEP
416 if (strlen(path) >= MAXPATHLEN) {
417 PyErr_SetString(ZipImportError, "path too long");
418 return NULL;
419 }
420 strcpy(buf, path);
421 for (p = buf; *p; p++) {
422 if (*p == ALTSEP)
423 *p = SEP;
424 }
425 path = buf;
426#endif
427 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000428 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000429 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
430 path[len] == SEP) {
431 path = path + len + 1;
432 }
433
434 toc_entry = PyDict_GetItemString(self->files, path);
435 if (toc_entry == NULL) {
Georg Brandle9b19492006-02-19 09:38:58 +0000436 PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
Just van Rossum52e14d62002-12-30 22:08:05 +0000437 return NULL;
438 }
439 return get_data(PyString_AsString(self->archive), toc_entry);
440}
441
442static PyObject *
443zipimporter_get_code(PyObject *obj, PyObject *args)
444{
445 ZipImporter *self = (ZipImporter *)obj;
446 char *fullname;
447
448 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
449 return NULL;
450
451 return get_module_code(self, fullname, NULL, NULL);
452}
453
454static PyObject *
455zipimporter_get_source(PyObject *obj, PyObject *args)
456{
457 ZipImporter *self = (ZipImporter *)obj;
458 PyObject *toc_entry;
459 char *fullname, *subname, path[MAXPATHLEN+1];
460 int len;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000461 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000462
463 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
464 return NULL;
465
466 mi = get_module_info(self, fullname);
467 if (mi == MI_ERROR)
468 return NULL;
469 if (mi == MI_NOT_FOUND) {
470 PyErr_Format(ZipImportError, "can't find module '%.200s'",
471 fullname);
472 return NULL;
473 }
474 subname = get_subname(fullname);
475
476 len = make_filename(PyString_AsString(self->prefix), subname, path);
477 if (len < 0)
478 return NULL;
479
480 if (mi == MI_PACKAGE) {
481 path[len] = SEP;
482 strcpy(path + len + 1, "__init__.py");
483 }
484 else
485 strcpy(path + len, ".py");
486
487 toc_entry = PyDict_GetItemString(self->files, path);
488 if (toc_entry != NULL)
489 return get_data(PyString_AsString(self->archive), toc_entry);
490
491 /* we have the module, but no source */
492 Py_INCREF(Py_None);
493 return Py_None;
494}
495
496PyDoc_STRVAR(doc_find_module,
497"find_module(fullname, path=None) -> self or None.\n\
498\n\
499Search for a module specified by 'fullname'. 'fullname' must be the\n\
500fully qualified (dotted) module name. It returns the zipimporter\n\
501instance itself if the module was found, or None if it wasn't.\n\
502The optional 'path' argument is ignored -- it's there for compatibility\n\
503with the importer protocol.");
504
505PyDoc_STRVAR(doc_load_module,
506"load_module(fullname) -> module.\n\
507\n\
508Load the module specified by 'fullname'. 'fullname' must be the\n\
509fully qualified (dotted) module name. It returns the imported\n\
510module, or raises ZipImportError if it wasn't found.");
511
512PyDoc_STRVAR(doc_get_data,
513"get_data(pathname) -> string with file data.\n\
514\n\
515Return the data associated with 'pathname'. Raise IOError if\n\
516the file wasn't found.");
517
518PyDoc_STRVAR(doc_is_package,
519"is_package(fullname) -> bool.\n\
520\n\
521Return True if the module specified by fullname is a package.\n\
522Raise ZipImportError is the module couldn't be found.");
523
524PyDoc_STRVAR(doc_get_code,
525"get_code(fullname) -> code object.\n\
526\n\
527Return the code object for the specified module. Raise ZipImportError\n\
528is the module couldn't be found.");
529
530PyDoc_STRVAR(doc_get_source,
531"get_source(fullname) -> source string.\n\
532\n\
533Return the source code for the specified module. Raise ZipImportError\n\
534is the module couldn't be found, return None if the archive does\n\
535contain the module, but has no source for it.");
536
537static PyMethodDef zipimporter_methods[] = {
538 {"find_module", zipimporter_find_module, METH_VARARGS,
539 doc_find_module},
540 {"load_module", zipimporter_load_module, METH_VARARGS,
541 doc_load_module},
542 {"get_data", zipimporter_get_data, METH_VARARGS,
543 doc_get_data},
544 {"get_code", zipimporter_get_code, METH_VARARGS,
545 doc_get_code},
546 {"get_source", zipimporter_get_source, METH_VARARGS,
547 doc_get_source},
548 {"is_package", zipimporter_is_package, METH_VARARGS,
549 doc_is_package},
550 {NULL, NULL} /* sentinel */
551};
552
553static PyMemberDef zipimporter_members[] = {
554 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
555 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
556 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
557 {NULL}
558};
559
560PyDoc_STRVAR(zipimporter_doc,
561"zipimporter(archivepath) -> zipimporter object\n\
562\n\
563Create a new zipimporter instance. 'archivepath' must be a path to\n\
564a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
565a valid Zip archive.");
566
567#define DEFERRED_ADDRESS(ADDR) 0
568
569static PyTypeObject ZipImporter_Type = {
570 PyObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type))
571 0,
572 "zipimport.zipimporter",
573 sizeof(ZipImporter),
574 0, /* tp_itemsize */
575 (destructor)zipimporter_dealloc, /* tp_dealloc */
576 0, /* tp_print */
577 0, /* tp_getattr */
578 0, /* tp_setattr */
579 0, /* tp_compare */
580 (reprfunc)zipimporter_repr, /* tp_repr */
581 0, /* tp_as_number */
582 0, /* tp_as_sequence */
583 0, /* tp_as_mapping */
584 0, /* tp_hash */
585 0, /* tp_call */
586 0, /* tp_str */
587 PyObject_GenericGetAttr, /* tp_getattro */
588 0, /* tp_setattro */
589 0, /* tp_as_buffer */
590 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
591 Py_TPFLAGS_HAVE_GC, /* tp_flags */
592 zipimporter_doc, /* tp_doc */
593 zipimporter_traverse, /* tp_traverse */
594 0, /* tp_clear */
595 0, /* tp_richcompare */
596 0, /* tp_weaklistoffset */
597 0, /* tp_iter */
598 0, /* tp_iternext */
599 zipimporter_methods, /* tp_methods */
600 zipimporter_members, /* tp_members */
601 0, /* tp_getset */
602 0, /* tp_base */
603 0, /* tp_dict */
604 0, /* tp_descr_get */
605 0, /* tp_descr_set */
606 0, /* tp_dictoffset */
607 (initproc)zipimporter_init, /* tp_init */
608 PyType_GenericAlloc, /* tp_alloc */
609 PyType_GenericNew, /* tp_new */
610 PyObject_GC_Del, /* tp_free */
611};
612
613
614/* implementation */
615
Just van Rossum52e14d62002-12-30 22:08:05 +0000616/* Given a buffer, return the long that is represented by the first
617 4 bytes, encoded as little endian. This partially reimplements
618 marshal.c:r_long() */
619static long
620get_long(unsigned char *buf) {
621 long x;
622 x = buf[0];
623 x |= (long)buf[1] << 8;
624 x |= (long)buf[2] << 16;
625 x |= (long)buf[3] << 24;
626#if SIZEOF_LONG > 4
627 /* Sign extension for 64-bit machines */
628 x |= -(x & 0x80000000L);
629#endif
630 return x;
631}
632
633/*
634 read_directory(archive) -> files dict (new reference)
635
636 Given a path to a Zip archive, build a dict, mapping file names
637 (local to the archive, using SEP as a separator) to toc entries.
638
639 A toc_entry is a tuple:
640
Fred Drakef5b7fd22005-11-11 19:34:56 +0000641 (__file__, # value to use for __file__, available for all files
642 compress, # compression kind; 0 for uncompressed
Just van Rossum52e14d62002-12-30 22:08:05 +0000643 data_size, # size of compressed data on disk
644 file_size, # size of decompressed data
645 file_offset, # offset of file header from start of archive
646 time, # mod time of file (in dos format)
647 date, # mod data of file (in dos format)
648 crc, # crc checksum of the data
649 )
650
651 Directories can be recognized by the trailing SEP in the name,
652 data_size and file_offset are 0.
653*/
654static PyObject *
655read_directory(char *archive)
656{
657 PyObject *files = NULL;
658 FILE *fp;
659 long compress, crc, data_size, file_size, file_offset, date, time;
Thomas Heller354e3d92003-07-22 18:10:15 +0000660 long header_offset, name_size, header_size, header_position;
Neal Norwitzd39d8612006-01-08 01:03:36 +0000661 long i, l, count;
662 size_t length;
Just van Rossum52e14d62002-12-30 22:08:05 +0000663 char path[MAXPATHLEN + 5];
664 char name[MAXPATHLEN + 5];
665 char *p, endof_central_dir[22];
Thomas Heller354e3d92003-07-22 18:10:15 +0000666 long arc_offset; /* offset from beginning of file to start of zip-archive */
Just van Rossum52e14d62002-12-30 22:08:05 +0000667
668 if (strlen(archive) > MAXPATHLEN) {
669 PyErr_SetString(PyExc_OverflowError,
670 "Zip path name is too long");
671 return NULL;
672 }
673 strcpy(path, archive);
674
675 fp = fopen(archive, "rb");
676 if (fp == NULL) {
677 PyErr_Format(ZipImportError, "can't open Zip file: "
678 "'%.200s'", archive);
679 return NULL;
680 }
Just van Rossumf4ecc752003-02-28 08:54:01 +0000681 fseek(fp, -22, SEEK_END);
Thomas Heller354e3d92003-07-22 18:10:15 +0000682 header_position = ftell(fp);
Just van Rossum52e14d62002-12-30 22:08:05 +0000683 if (fread(endof_central_dir, 1, 22, fp) != 22) {
684 fclose(fp);
685 PyErr_Format(ZipImportError, "can't read Zip file: "
686 "'%.200s'", archive);
687 return NULL;
688 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000689 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000690 /* Bad: End of Central Dir signature */
691 fclose(fp);
692 PyErr_Format(ZipImportError, "not a Zip file: "
693 "'%.200s'", archive);
694 return NULL;
695 }
696
Thomas Heller354e3d92003-07-22 18:10:15 +0000697 header_size = get_long((unsigned char *)endof_central_dir + 12);
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000698 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Thomas Heller354e3d92003-07-22 18:10:15 +0000699 arc_offset = header_position - header_offset - header_size;
700 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000701
702 files = PyDict_New();
703 if (files == NULL)
704 goto error;
705
706 length = (long)strlen(path);
707 path[length] = SEP;
708
709 /* Start of Central Directory */
710 count = 0;
711 for (;;) {
712 PyObject *t;
713 int err;
714
715 fseek(fp, header_offset, 0); /* Start of file header */
716 l = PyMarshal_ReadLongFromFile(fp);
717 if (l != 0x02014B50)
718 break; /* Bad: Central Dir File Header */
719 fseek(fp, header_offset + 10, 0);
720 compress = PyMarshal_ReadShortFromFile(fp);
721 time = PyMarshal_ReadShortFromFile(fp);
722 date = PyMarshal_ReadShortFromFile(fp);
723 crc = PyMarshal_ReadLongFromFile(fp);
724 data_size = PyMarshal_ReadLongFromFile(fp);
725 file_size = PyMarshal_ReadLongFromFile(fp);
726 name_size = PyMarshal_ReadShortFromFile(fp);
727 header_size = 46 + name_size +
728 PyMarshal_ReadShortFromFile(fp) +
729 PyMarshal_ReadShortFromFile(fp);
730 fseek(fp, header_offset + 42, 0);
Thomas Heller354e3d92003-07-22 18:10:15 +0000731 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000732 if (name_size > MAXPATHLEN)
733 name_size = MAXPATHLEN;
734
735 p = name;
736 for (i = 0; i < name_size; i++) {
737 *p = (char)getc(fp);
738 if (*p == '/')
739 *p = SEP;
740 p++;
741 }
742 *p = 0; /* Add terminating null byte */
743 header_offset += header_size;
744
745 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
746
747 t = Py_BuildValue("siiiiiii", path, compress, data_size,
748 file_size, file_offset, time, date, crc);
749 if (t == NULL)
750 goto error;
751 err = PyDict_SetItemString(files, name, t);
752 Py_DECREF(t);
753 if (err != 0)
754 goto error;
755 count++;
756 }
757 fclose(fp);
758 if (Py_VerboseFlag)
759 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
760 count, archive);
761 return files;
762error:
763 fclose(fp);
764 Py_XDECREF(files);
765 return NULL;
766}
767
768/* Return the zlib.decompress function object, or NULL if zlib couldn't
769 be imported. The function is cached when found, so subsequent calls
770 don't import zlib again. Returns a *borrowed* reference.
771 XXX This makes zlib.decompress immortal. */
772static PyObject *
773get_decompress_func(void)
774{
775 static PyObject *decompress = NULL;
776
777 if (decompress == NULL) {
778 PyObject *zlib;
779 static int importing_zlib = 0;
780
781 if (importing_zlib != 0)
782 /* Someone has a zlib.py[co] in their Zip file;
783 let's avoid a stack overflow. */
784 return NULL;
785 importing_zlib = 1;
786 zlib = PyImport_ImportModule("zlib"); /* import zlib */
787 importing_zlib = 0;
788 if (zlib != NULL) {
789 decompress = PyObject_GetAttrString(zlib,
790 "decompress");
791 Py_DECREF(zlib);
792 }
793 else
794 PyErr_Clear();
795 if (Py_VerboseFlag)
796 PySys_WriteStderr("# zipimport: zlib %s\n",
797 zlib != NULL ? "available": "UNAVAILABLE");
798 }
799 return decompress;
800}
801
802/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
803 data as a new reference. */
804static PyObject *
805get_data(char *archive, PyObject *toc_entry)
806{
807 PyObject *raw_data, *data = NULL, *decompress;
808 char *buf;
809 FILE *fp;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000810 int err;
811 Py_ssize_t bytes_read = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000812 long l;
813 char *datapath;
814 long compress, data_size, file_size, file_offset;
815 long time, date, crc;
816
Neal Norwitz0c0aad92003-02-18 03:37:49 +0000817 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
Just van Rossum52e14d62002-12-30 22:08:05 +0000818 &data_size, &file_size, &file_offset, &time,
819 &date, &crc)) {
820 return NULL;
821 }
822
823 fp = fopen(archive, "rb");
824 if (!fp) {
825 PyErr_Format(PyExc_IOError,
826 "zipimport: can not open file %s", archive);
827 return NULL;
828 }
829
830 /* Check to make sure the local file header is correct */
831 fseek(fp, file_offset, 0);
832 l = PyMarshal_ReadLongFromFile(fp);
833 if (l != 0x04034B50) {
834 /* Bad: Local File Header */
835 PyErr_Format(ZipImportError,
836 "bad local file header in %s",
837 archive);
838 fclose(fp);
839 return NULL;
840 }
841 fseek(fp, file_offset + 26, 0);
842 l = 30 + PyMarshal_ReadShortFromFile(fp) +
843 PyMarshal_ReadShortFromFile(fp); /* local header size */
844 file_offset += l; /* Start of file data */
845
846 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
847 data_size : data_size + 1);
848 if (raw_data == NULL) {
849 fclose(fp);
850 return NULL;
851 }
852 buf = PyString_AsString(raw_data);
853
854 err = fseek(fp, file_offset, 0);
855 if (err == 0)
856 bytes_read = fread(buf, 1, data_size, fp);
857 fclose(fp);
858 if (err || bytes_read != data_size) {
859 PyErr_SetString(PyExc_IOError,
860 "zipimport: can't read data");
861 Py_DECREF(raw_data);
862 return NULL;
863 }
864
865 if (compress != 0) {
866 buf[data_size] = 'Z'; /* saw this in zipfile.py */
867 data_size++;
868 }
869 buf[data_size] = '\0';
870
871 if (compress == 0) /* data is not compressed */
872 return raw_data;
873
874 /* Decompress with zlib */
875 decompress = get_decompress_func();
876 if (decompress == NULL) {
877 PyErr_SetString(ZipImportError,
878 "can't decompress data; "
879 "zlib not available");
880 goto error;
881 }
Just van Rossumee8f10f2003-09-07 13:36:48 +0000882 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000883error:
884 Py_DECREF(raw_data);
885 return data;
886}
887
888/* Lenient date/time comparison function. The precision of the mtime
889 in the archive is lower than the mtime stored in a .pyc: we
890 must allow a difference of at most one second. */
891static int
892eq_mtime(time_t t1, time_t t2)
893{
894 time_t d = t1 - t2;
895 if (d < 0)
896 d = -d;
897 /* dostime only stores even seconds, so be lenient */
898 return d <= 1;
899}
900
901/* Given the contents of a .py[co] file in a buffer, unmarshal the data
902 and return the code object. Return None if it the magic word doesn't
903 match (we do this instead of raising an exception as we fall back
904 to .py if available and we don't want to mask other errors).
905 Returns a new reference. */
906static PyObject *
907unmarshal_code(char *pathname, PyObject *data, time_t mtime)
908{
909 PyObject *code;
910 char *buf = PyString_AsString(data);
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000911 Py_ssize_t size = PyString_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +0000912
913 if (size <= 9) {
914 PyErr_SetString(ZipImportError,
915 "bad pyc data");
916 return NULL;
917 }
918
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000919 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000920 if (Py_VerboseFlag)
921 PySys_WriteStderr("# %s has bad magic\n",
922 pathname);
923 Py_INCREF(Py_None);
924 return Py_None; /* signal caller to try alternative */
925 }
926
Just van Rossum9a3129c2003-01-03 11:18:56 +0000927 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
928 mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000929 if (Py_VerboseFlag)
930 PySys_WriteStderr("# %s has bad mtime\n",
931 pathname);
932 Py_INCREF(Py_None);
933 return Py_None; /* signal caller to try alternative */
934 }
935
936 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
937 if (code == NULL)
938 return NULL;
939 if (!PyCode_Check(code)) {
940 Py_DECREF(code);
941 PyErr_Format(PyExc_TypeError,
942 "compiled module %.200s is not a code object",
943 pathname);
944 return NULL;
945 }
946 return code;
947}
948
949/* Replace any occurances of "\r\n?" in the input string with "\n".
950 This converts DOS and Mac line endings to Unix line endings.
951 Also append a trailing "\n" to be compatible with
952 PyParser_SimpleParseFile(). Returns a new reference. */
953static PyObject *
954normalize_line_endings(PyObject *source)
955{
Just van Rossum9a3129c2003-01-03 11:18:56 +0000956 char *buf, *q, *p = PyString_AsString(source);
Just van Rossum52e14d62002-12-30 22:08:05 +0000957 PyObject *fixed_source;
958
Just van Rossum9a3129c2003-01-03 11:18:56 +0000959 /* one char extra for trailing \n and one for terminating \0 */
Anthony Baxterd6495b52006-04-12 04:29:01 +0000960 buf = (char *)PyMem_Malloc(PyString_Size(source) + 2);
Just van Rossum9a3129c2003-01-03 11:18:56 +0000961 if (buf == NULL) {
962 PyErr_SetString(PyExc_MemoryError,
963 "zipimport: no memory to allocate "
964 "source buffer");
Just van Rossum52e14d62002-12-30 22:08:05 +0000965 return NULL;
Just van Rossum9a3129c2003-01-03 11:18:56 +0000966 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000967 /* replace "\r\n?" by "\n" */
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000968 for (q = buf; *p != '\0'; p++) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000969 if (*p == '\r') {
970 *q++ = '\n';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000971 if (*(p + 1) == '\n')
Just van Rossum52e14d62002-12-30 22:08:05 +0000972 p++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000973 }
974 else
975 *q++ = *p;
Just van Rossum52e14d62002-12-30 22:08:05 +0000976 }
977 *q++ = '\n'; /* add trailing \n */
978 *q = '\0';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000979 fixed_source = PyString_FromString(buf);
980 PyMem_Free(buf);
Just van Rossum52e14d62002-12-30 22:08:05 +0000981 return fixed_source;
982}
983
984/* Given a string buffer containing Python source code, compile it
985 return and return a code object as a new reference. */
986static PyObject *
987compile_source(char *pathname, PyObject *source)
988{
989 PyObject *code, *fixed_source;
990
991 fixed_source = normalize_line_endings(source);
992 if (fixed_source == NULL)
993 return NULL;
994
995 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
996 Py_file_input);
997 Py_DECREF(fixed_source);
998 return code;
999}
1000
1001/* Convert the date/time values found in the Zip archive to a value
1002 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001003static time_t
1004parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001005{
1006 struct tm stm;
1007
1008 stm.tm_sec = (dostime & 0x1f) * 2;
1009 stm.tm_min = (dostime >> 5) & 0x3f;
1010 stm.tm_hour = (dostime >> 11) & 0x1f;
1011 stm.tm_mday = dosdate & 0x1f;
1012 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1013 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
Just van Rossum547eb422003-04-08 20:07:15 +00001014 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001015
1016 return mktime(&stm);
1017}
1018
1019/* Given a path to a .pyc or .pyo file in the archive, return the
1020 modifictaion time of the matching .py file, or 0 if no source
1021 is available. */
1022static time_t
1023get_mtime_of_source(ZipImporter *self, char *path)
1024{
1025 PyObject *toc_entry;
1026 time_t mtime = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001027 Py_ssize_t lastchar = strlen(path) - 1;
Just van Rossum52e14d62002-12-30 22:08:05 +00001028 char savechar = path[lastchar];
1029 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1030 toc_entry = PyDict_GetItemString(self->files, path);
1031 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1032 PyTuple_Size(toc_entry) == 8) {
Just van Rossumf8b6de12002-12-31 09:51:59 +00001033 /* fetch the time stamp of the .py file for comparison
1034 with an embedded pyc time stamp */
1035 int time, date;
Just van Rossum52e14d62002-12-30 22:08:05 +00001036 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1037 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1038 mtime = parse_dostime(time, date);
1039 }
1040 path[lastchar] = savechar;
1041 return mtime;
1042}
1043
1044/* Return the code object for the module named by 'fullname' from the
1045 Zip archive as a new reference. */
1046static PyObject *
1047get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1048 time_t mtime, PyObject *toc_entry)
1049{
1050 PyObject *data, *code;
1051 char *modpath;
1052 char *archive = PyString_AsString(self->archive);
1053
1054 if (archive == NULL)
1055 return NULL;
1056
1057 data = get_data(archive, toc_entry);
1058 if (data == NULL)
1059 return NULL;
1060
1061 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1062
1063 if (isbytecode) {
1064 code = unmarshal_code(modpath, data, mtime);
1065 }
1066 else {
1067 code = compile_source(modpath, data);
1068 }
1069 Py_DECREF(data);
1070 return code;
1071}
1072
1073/* Get the code object assoiciated with the module specified by
1074 'fullname'. */
1075static PyObject *
1076get_module_code(ZipImporter *self, char *fullname,
1077 int *p_ispackage, char **p_modpath)
1078{
1079 PyObject *toc_entry;
1080 char *subname, path[MAXPATHLEN + 1];
1081 int len;
1082 struct st_zip_searchorder *zso;
1083
1084 subname = get_subname(fullname);
1085
1086 len = make_filename(PyString_AsString(self->prefix), subname, path);
1087 if (len < 0)
1088 return NULL;
1089
1090 for (zso = zip_searchorder; *zso->suffix; zso++) {
1091 PyObject *code = NULL;
1092
1093 strcpy(path + len, zso->suffix);
1094 if (Py_VerboseFlag > 1)
1095 PySys_WriteStderr("# trying %s%c%s\n",
1096 PyString_AsString(self->archive),
1097 SEP, path);
1098 toc_entry = PyDict_GetItemString(self->files, path);
1099 if (toc_entry != NULL) {
1100 time_t mtime = 0;
1101 int ispackage = zso->type & IS_PACKAGE;
1102 int isbytecode = zso->type & IS_BYTECODE;
1103
1104 if (isbytecode)
1105 mtime = get_mtime_of_source(self, path);
1106 if (p_ispackage != NULL)
1107 *p_ispackage = ispackage;
1108 code = get_code_from_data(self, ispackage,
1109 isbytecode, mtime,
1110 toc_entry);
1111 if (code == Py_None) {
1112 /* bad magic number or non-matching mtime
1113 in byte code, try next */
1114 Py_DECREF(code);
1115 continue;
1116 }
1117 if (code != NULL && p_modpath != NULL)
1118 *p_modpath = PyString_AsString(
1119 PyTuple_GetItem(toc_entry, 0));
1120 return code;
1121 }
1122 }
1123 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1124 return NULL;
1125}
1126
1127
1128/* Module init */
1129
1130PyDoc_STRVAR(zipimport_doc,
1131"zipimport provides support for importing Python modules from Zip archives.\n\
1132\n\
1133This module exports three objects:\n\
1134- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001135- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001136 subclass of ImportError, so it can be caught as ImportError, too.\n\
1137- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1138 info dicts, as used in zipimporter._files.\n\
1139\n\
1140It is usually not needed to use the zipimport module explicitly; it is\n\
1141used by the builtin import mechanism for sys.path items that are paths\n\
1142to Zip archives.");
1143
1144PyMODINIT_FUNC
1145initzipimport(void)
1146{
1147 PyObject *mod;
1148
1149 if (PyType_Ready(&ZipImporter_Type) < 0)
1150 return;
1151
1152 /* Correct directory separator */
1153 zip_searchorder[0].suffix[0] = SEP;
1154 zip_searchorder[1].suffix[0] = SEP;
1155 zip_searchorder[2].suffix[0] = SEP;
1156 if (Py_OptimizeFlag) {
1157 /* Reverse *.pyc and *.pyo */
1158 struct st_zip_searchorder tmp;
1159 tmp = zip_searchorder[0];
1160 zip_searchorder[0] = zip_searchorder[1];
1161 zip_searchorder[1] = tmp;
1162 tmp = zip_searchorder[3];
1163 zip_searchorder[3] = zip_searchorder[4];
1164 zip_searchorder[4] = tmp;
1165 }
1166
1167 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1168 NULL, PYTHON_API_VERSION);
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00001169 if (mod == NULL)
1170 return;
Just van Rossum52e14d62002-12-30 22:08:05 +00001171
1172 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1173 PyExc_ImportError, NULL);
1174 if (ZipImportError == NULL)
1175 return;
1176
1177 Py_INCREF(ZipImportError);
1178 if (PyModule_AddObject(mod, "ZipImportError",
1179 ZipImportError) < 0)
1180 return;
1181
1182 Py_INCREF(&ZipImporter_Type);
1183 if (PyModule_AddObject(mod, "zipimporter",
1184 (PyObject *)&ZipImporter_Type) < 0)
1185 return;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001186
Just van Rossum52e14d62002-12-30 22:08:05 +00001187 zip_directory_cache = PyDict_New();
1188 if (zip_directory_cache == NULL)
1189 return;
1190 Py_INCREF(zip_directory_cache);
1191 if (PyModule_AddObject(mod, "_zip_directory_cache",
1192 zip_directory_cache) < 0)
1193 return;
1194}