blob: 42bb7a55f3a6251c2f900c0d2a702c404dae1455 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
13 char suffix[14];
14 int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Just van Rossum52e14d62002-12-30 22:08:05 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
41};
42
Just van Rossum52e14d62002-12-30 22:08:05 +000043static PyObject *ZipImportError;
44static PyObject *zip_directory_cache = NULL;
45
46/* forward decls */
47static PyObject *read_directory(char *archive);
48static PyObject *get_data(char *archive, PyObject *toc_entry);
49static PyObject *get_module_code(ZipImporter *self, char *fullname,
50 int *p_ispackage, char **p_modpath);
51
52
53#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
54
55
56/* zipimporter.__init__
57 Split the "subdirectory" from the Zip archive path, lookup a matching
58 entry in sys.path_importer_cache, fetch the file directory from there
59 if found, or else read it from the archive. */
60static int
61zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
62{
63 char *path, *p, *prefix, buf[MAXPATHLEN+2];
Neal Norwitzd39d8612006-01-08 01:03:36 +000064 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +000065
Georg Brandl02c42872005-08-26 06:42:30 +000066 if (!_PyArg_NoKeywords("zipimporter()", kwds))
67 return -1;
68
Just van Rossum52e14d62002-12-30 22:08:05 +000069 if (!PyArg_ParseTuple(args, "s:zipimporter",
70 &path))
71 return -1;
72
73 len = strlen(path);
74 if (len == 0) {
75 PyErr_SetString(ZipImportError, "archive path is empty");
76 return -1;
77 }
78 if (len >= MAXPATHLEN) {
79 PyErr_SetString(ZipImportError,
80 "archive path too long");
81 return -1;
82 }
83 strcpy(buf, path);
84
85#ifdef ALTSEP
86 for (p = buf; *p; p++) {
87 if (*p == ALTSEP)
88 *p = SEP;
89 }
90#endif
91
92 path = NULL;
93 prefix = NULL;
94 for (;;) {
95 struct stat statbuf;
96 int rv;
97
98 rv = stat(buf, &statbuf);
99 if (rv == 0) {
100 /* it exists */
101 if (S_ISREG(statbuf.st_mode))
102 /* it's a file */
103 path = buf;
104 break;
105 }
106 /* back up one path element */
Just van Rossumd35c6db2003-01-02 12:55:48 +0000107 p = strrchr(buf, SEP);
Just van Rossum52e14d62002-12-30 22:08:05 +0000108 if (prefix != NULL)
109 *prefix = SEP;
110 if (p == NULL)
111 break;
112 *p = '\0';
113 prefix = p;
114 }
115 if (path != NULL) {
116 PyObject *files;
117 files = PyDict_GetItemString(zip_directory_cache, path);
118 if (files == NULL) {
119 files = read_directory(buf);
120 if (files == NULL)
121 return -1;
122 if (PyDict_SetItemString(zip_directory_cache, path,
123 files) != 0)
124 return -1;
125 }
126 else
127 Py_INCREF(files);
128 self->files = files;
129 }
130 else {
131 PyErr_SetString(ZipImportError, "not a Zip file");
132 return -1;
133 }
134
135 if (prefix == NULL)
136 prefix = "";
137 else {
138 prefix++;
139 len = strlen(prefix);
140 if (prefix[len-1] != SEP) {
141 /* add trailing SEP */
142 prefix[len] = SEP;
143 prefix[len + 1] = '\0';
144 }
145 }
146
147 self->archive = PyString_FromString(buf);
148 if (self->archive == NULL)
149 return -1;
150
151 self->prefix = PyString_FromString(prefix);
152 if (self->prefix == NULL)
153 return -1;
154
155 return 0;
156}
157
158/* GC support. */
159static int
160zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
161{
162 ZipImporter *self = (ZipImporter *)obj;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000163 Py_VISIT(self->files);
Just van Rossum52e14d62002-12-30 22:08:05 +0000164 return 0;
165}
166
167static void
168zipimporter_dealloc(ZipImporter *self)
169{
170 PyObject_GC_UnTrack(self);
171 Py_XDECREF(self->archive);
Just van Rossumf8b6de12002-12-31 09:51:59 +0000172 Py_XDECREF(self->prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000173 Py_XDECREF(self->files);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000174 Py_Type(self)->tp_free((PyObject *)self);
Just van Rossum52e14d62002-12-30 22:08:05 +0000175}
176
177static PyObject *
178zipimporter_repr(ZipImporter *self)
179{
Just van Rossum52e14d62002-12-30 22:08:05 +0000180 char *archive = "???";
181 char *prefix = "";
182
183 if (self->archive != NULL && PyString_Check(self->archive))
184 archive = PyString_AsString(self->archive);
185 if (self->prefix != NULL && PyString_Check(self->prefix))
186 prefix = PyString_AsString(self->prefix);
187 if (prefix != NULL && *prefix)
Walter Dörwald5b0443c2007-06-05 16:19:33 +0000188 return PyUnicode_FromFormat("<zipimporter object \"%.300s%c%.150s\">",
189 archive, SEP, prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000190 else
Walter Dörwald5b0443c2007-06-05 16:19:33 +0000191 return PyUnicode_FromFormat("<zipimporter object \"%.300s\">",
192 archive);
Just van Rossum52e14d62002-12-30 22:08:05 +0000193}
194
195/* return fullname.split(".")[-1] */
196static char *
197get_subname(char *fullname)
198{
199 char *subname = strrchr(fullname, '.');
200 if (subname == NULL)
201 subname = fullname;
202 else
203 subname++;
204 return subname;
205}
206
207/* Given a (sub)modulename, write the potential file path in the
208 archive (without extension) to the path buffer. Return the
209 length of the resulting string. */
210static int
211make_filename(char *prefix, char *name, char *path)
212{
Neal Norwitzd39d8612006-01-08 01:03:36 +0000213 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000214 char *p;
215
216 len = strlen(prefix);
217
218 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
219 if (len + strlen(name) + 13 >= MAXPATHLEN) {
220 PyErr_SetString(ZipImportError, "path too long");
221 return -1;
222 }
223
224 strcpy(path, prefix);
225 strcpy(path + len, name);
226 for (p = path + len; *p; p++) {
227 if (*p == '.')
228 *p = SEP;
229 }
230 len += strlen(name);
Martin v. Löwis18e16552006-02-15 17:27:45 +0000231 assert(len < INT_MAX);
Neal Norwitzd39d8612006-01-08 01:03:36 +0000232 return (int)len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000233}
234
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000235enum zi_module_info {
Just van Rossum52e14d62002-12-30 22:08:05 +0000236 MI_ERROR,
237 MI_NOT_FOUND,
238 MI_MODULE,
239 MI_PACKAGE
240};
241
242/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000243static enum zi_module_info
Just van Rossum52e14d62002-12-30 22:08:05 +0000244get_module_info(ZipImporter *self, char *fullname)
245{
246 char *subname, path[MAXPATHLEN + 1];
247 int len;
248 struct st_zip_searchorder *zso;
249
250 subname = get_subname(fullname);
251
252 len = make_filename(PyString_AsString(self->prefix), subname, path);
253 if (len < 0)
254 return MI_ERROR;
255
256 for (zso = zip_searchorder; *zso->suffix; zso++) {
257 strcpy(path + len, zso->suffix);
258 if (PyDict_GetItemString(self->files, path) != NULL) {
259 if (zso->type & IS_PACKAGE)
260 return MI_PACKAGE;
261 else
262 return MI_MODULE;
263 }
264 }
265 return MI_NOT_FOUND;
266}
267
268/* Check whether we can satisfy the import of the module named by
269 'fullname'. Return self if we can, None if we can't. */
270static PyObject *
271zipimporter_find_module(PyObject *obj, PyObject *args)
272{
273 ZipImporter *self = (ZipImporter *)obj;
274 PyObject *path = NULL;
275 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000276 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000277
278 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
279 &fullname, &path))
280 return NULL;
281
282 mi = get_module_info(self, fullname);
283 if (mi == MI_ERROR)
284 return NULL;
285 if (mi == MI_NOT_FOUND) {
286 Py_INCREF(Py_None);
287 return Py_None;
288 }
289 Py_INCREF(self);
290 return (PyObject *)self;
291}
292
293/* Load and return the module named by 'fullname'. */
294static PyObject *
295zipimporter_load_module(PyObject *obj, PyObject *args)
296{
297 ZipImporter *self = (ZipImporter *)obj;
298 PyObject *code, *mod, *dict;
299 char *fullname, *modpath;
300 int ispackage;
301
302 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
303 &fullname))
304 return NULL;
305
306 code = get_module_code(self, fullname, &ispackage, &modpath);
307 if (code == NULL)
308 return NULL;
309
310 mod = PyImport_AddModule(fullname);
311 if (mod == NULL) {
312 Py_DECREF(code);
313 return NULL;
314 }
315 dict = PyModule_GetDict(mod);
316
317 /* mod.__loader__ = self */
318 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
319 goto error;
320
321 if (ispackage) {
322 /* add __path__ to the module *before* the code gets
323 executed */
324 PyObject *pkgpath, *fullpath;
325 char *prefix = PyString_AsString(self->prefix);
326 char *subname = get_subname(fullname);
327 int err;
328
329 fullpath = PyString_FromFormat("%s%c%s%s",
330 PyString_AsString(self->archive),
331 SEP,
332 *prefix ? prefix : "",
333 subname);
334 if (fullpath == NULL)
335 goto error;
336
337 pkgpath = Py_BuildValue("[O]", fullpath);
338 Py_DECREF(fullpath);
339 if (pkgpath == NULL)
340 goto error;
341 err = PyDict_SetItemString(dict, "__path__", pkgpath);
342 Py_DECREF(pkgpath);
343 if (err != 0)
344 goto error;
345 }
346 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
347 Py_DECREF(code);
348 if (Py_VerboseFlag)
349 PySys_WriteStderr("import %s # loaded from Zip %s\n",
350 fullname, modpath);
351 return mod;
352error:
353 Py_DECREF(code);
354 Py_DECREF(mod);
355 return NULL;
356}
357
358/* Return a bool signifying whether the module is a package or not. */
359static PyObject *
360zipimporter_is_package(PyObject *obj, PyObject *args)
361{
362 ZipImporter *self = (ZipImporter *)obj;
363 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000364 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000365
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000366 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
Just van Rossum52e14d62002-12-30 22:08:05 +0000367 &fullname))
368 return NULL;
369
370 mi = get_module_info(self, fullname);
371 if (mi == MI_ERROR)
372 return NULL;
373 if (mi == MI_NOT_FOUND) {
374 PyErr_Format(ZipImportError, "can't find module '%.200s'",
375 fullname);
376 return NULL;
377 }
378 return PyBool_FromLong(mi == MI_PACKAGE);
379}
380
381static PyObject *
382zipimporter_get_data(PyObject *obj, PyObject *args)
383{
384 ZipImporter *self = (ZipImporter *)obj;
385 char *path;
386#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000387 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000388#endif
389 PyObject *toc_entry;
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000390 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000391
392 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
393 return NULL;
394
395#ifdef ALTSEP
396 if (strlen(path) >= MAXPATHLEN) {
397 PyErr_SetString(ZipImportError, "path too long");
398 return NULL;
399 }
400 strcpy(buf, path);
401 for (p = buf; *p; p++) {
402 if (*p == ALTSEP)
403 *p = SEP;
404 }
405 path = buf;
406#endif
407 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000408 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000409 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
410 path[len] == SEP) {
411 path = path + len + 1;
412 }
413
414 toc_entry = PyDict_GetItemString(self->files, path);
415 if (toc_entry == NULL) {
Georg Brandle9b19492006-02-19 09:38:58 +0000416 PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
Just van Rossum52e14d62002-12-30 22:08:05 +0000417 return NULL;
418 }
419 return get_data(PyString_AsString(self->archive), toc_entry);
420}
421
422static PyObject *
423zipimporter_get_code(PyObject *obj, PyObject *args)
424{
425 ZipImporter *self = (ZipImporter *)obj;
426 char *fullname;
427
428 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
429 return NULL;
430
431 return get_module_code(self, fullname, NULL, NULL);
432}
433
434static PyObject *
435zipimporter_get_source(PyObject *obj, PyObject *args)
436{
437 ZipImporter *self = (ZipImporter *)obj;
438 PyObject *toc_entry;
439 char *fullname, *subname, path[MAXPATHLEN+1];
440 int len;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000441 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000442
443 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
444 return NULL;
445
446 mi = get_module_info(self, fullname);
447 if (mi == MI_ERROR)
448 return NULL;
449 if (mi == MI_NOT_FOUND) {
450 PyErr_Format(ZipImportError, "can't find module '%.200s'",
451 fullname);
452 return NULL;
453 }
454 subname = get_subname(fullname);
455
456 len = make_filename(PyString_AsString(self->prefix), subname, path);
457 if (len < 0)
458 return NULL;
459
460 if (mi == MI_PACKAGE) {
461 path[len] = SEP;
462 strcpy(path + len + 1, "__init__.py");
463 }
464 else
465 strcpy(path + len, ".py");
466
467 toc_entry = PyDict_GetItemString(self->files, path);
Guido van Rossumad8d3002007-08-03 18:40:49 +0000468 if (toc_entry != NULL) {
469 PyObject *bytes = get_data(PyString_AsString(self->archive), toc_entry);
470 PyObject *res = PyUnicode_FromString(PyBytes_AsString(bytes));
471 Py_XDECREF(bytes);
472 return res;
473 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000474
475 /* we have the module, but no source */
476 Py_INCREF(Py_None);
477 return Py_None;
478}
479
480PyDoc_STRVAR(doc_find_module,
481"find_module(fullname, path=None) -> self or None.\n\
482\n\
483Search for a module specified by 'fullname'. 'fullname' must be the\n\
484fully qualified (dotted) module name. It returns the zipimporter\n\
485instance itself if the module was found, or None if it wasn't.\n\
486The optional 'path' argument is ignored -- it's there for compatibility\n\
487with the importer protocol.");
488
489PyDoc_STRVAR(doc_load_module,
490"load_module(fullname) -> module.\n\
491\n\
492Load the module specified by 'fullname'. 'fullname' must be the\n\
493fully qualified (dotted) module name. It returns the imported\n\
494module, or raises ZipImportError if it wasn't found.");
495
496PyDoc_STRVAR(doc_get_data,
497"get_data(pathname) -> string with file data.\n\
498\n\
499Return the data associated with 'pathname'. Raise IOError if\n\
500the file wasn't found.");
501
502PyDoc_STRVAR(doc_is_package,
503"is_package(fullname) -> bool.\n\
504\n\
505Return True if the module specified by fullname is a package.\n\
506Raise ZipImportError is the module couldn't be found.");
507
508PyDoc_STRVAR(doc_get_code,
509"get_code(fullname) -> code object.\n\
510\n\
511Return the code object for the specified module. Raise ZipImportError\n\
512is the module couldn't be found.");
513
514PyDoc_STRVAR(doc_get_source,
515"get_source(fullname) -> source string.\n\
516\n\
517Return the source code for the specified module. Raise ZipImportError\n\
518is the module couldn't be found, return None if the archive does\n\
519contain the module, but has no source for it.");
520
521static PyMethodDef zipimporter_methods[] = {
522 {"find_module", zipimporter_find_module, METH_VARARGS,
523 doc_find_module},
524 {"load_module", zipimporter_load_module, METH_VARARGS,
525 doc_load_module},
526 {"get_data", zipimporter_get_data, METH_VARARGS,
527 doc_get_data},
528 {"get_code", zipimporter_get_code, METH_VARARGS,
529 doc_get_code},
530 {"get_source", zipimporter_get_source, METH_VARARGS,
531 doc_get_source},
532 {"is_package", zipimporter_is_package, METH_VARARGS,
533 doc_is_package},
534 {NULL, NULL} /* sentinel */
535};
536
537static PyMemberDef zipimporter_members[] = {
538 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
539 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
540 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
541 {NULL}
542};
543
544PyDoc_STRVAR(zipimporter_doc,
545"zipimporter(archivepath) -> zipimporter object\n\
546\n\
547Create a new zipimporter instance. 'archivepath' must be a path to\n\
548a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
549a valid Zip archive.");
550
551#define DEFERRED_ADDRESS(ADDR) 0
552
553static PyTypeObject ZipImporter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000554 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
Just van Rossum52e14d62002-12-30 22:08:05 +0000555 "zipimport.zipimporter",
556 sizeof(ZipImporter),
557 0, /* tp_itemsize */
558 (destructor)zipimporter_dealloc, /* tp_dealloc */
559 0, /* tp_print */
560 0, /* tp_getattr */
561 0, /* tp_setattr */
562 0, /* tp_compare */
563 (reprfunc)zipimporter_repr, /* tp_repr */
564 0, /* tp_as_number */
565 0, /* tp_as_sequence */
566 0, /* tp_as_mapping */
567 0, /* tp_hash */
568 0, /* tp_call */
569 0, /* tp_str */
570 PyObject_GenericGetAttr, /* tp_getattro */
571 0, /* tp_setattro */
572 0, /* tp_as_buffer */
573 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
574 Py_TPFLAGS_HAVE_GC, /* tp_flags */
575 zipimporter_doc, /* tp_doc */
576 zipimporter_traverse, /* tp_traverse */
577 0, /* tp_clear */
578 0, /* tp_richcompare */
579 0, /* tp_weaklistoffset */
580 0, /* tp_iter */
581 0, /* tp_iternext */
582 zipimporter_methods, /* tp_methods */
583 zipimporter_members, /* tp_members */
584 0, /* tp_getset */
585 0, /* tp_base */
586 0, /* tp_dict */
587 0, /* tp_descr_get */
588 0, /* tp_descr_set */
589 0, /* tp_dictoffset */
590 (initproc)zipimporter_init, /* tp_init */
591 PyType_GenericAlloc, /* tp_alloc */
592 PyType_GenericNew, /* tp_new */
593 PyObject_GC_Del, /* tp_free */
594};
595
596
597/* implementation */
598
Just van Rossum52e14d62002-12-30 22:08:05 +0000599/* Given a buffer, return the long that is represented by the first
600 4 bytes, encoded as little endian. This partially reimplements
601 marshal.c:r_long() */
602static long
603get_long(unsigned char *buf) {
604 long x;
605 x = buf[0];
606 x |= (long)buf[1] << 8;
607 x |= (long)buf[2] << 16;
608 x |= (long)buf[3] << 24;
609#if SIZEOF_LONG > 4
610 /* Sign extension for 64-bit machines */
611 x |= -(x & 0x80000000L);
612#endif
613 return x;
614}
615
616/*
617 read_directory(archive) -> files dict (new reference)
618
619 Given a path to a Zip archive, build a dict, mapping file names
620 (local to the archive, using SEP as a separator) to toc entries.
621
622 A toc_entry is a tuple:
623
Fred Drakef5b7fd22005-11-11 19:34:56 +0000624 (__file__, # value to use for __file__, available for all files
625 compress, # compression kind; 0 for uncompressed
Just van Rossum52e14d62002-12-30 22:08:05 +0000626 data_size, # size of compressed data on disk
627 file_size, # size of decompressed data
628 file_offset, # offset of file header from start of archive
629 time, # mod time of file (in dos format)
630 date, # mod data of file (in dos format)
631 crc, # crc checksum of the data
632 )
633
634 Directories can be recognized by the trailing SEP in the name,
635 data_size and file_offset are 0.
636*/
637static PyObject *
638read_directory(char *archive)
639{
640 PyObject *files = NULL;
641 FILE *fp;
642 long compress, crc, data_size, file_size, file_offset, date, time;
Thomas Heller354e3d92003-07-22 18:10:15 +0000643 long header_offset, name_size, header_size, header_position;
Neal Norwitzd39d8612006-01-08 01:03:36 +0000644 long i, l, count;
645 size_t length;
Just van Rossum52e14d62002-12-30 22:08:05 +0000646 char path[MAXPATHLEN + 5];
647 char name[MAXPATHLEN + 5];
648 char *p, endof_central_dir[22];
Thomas Heller354e3d92003-07-22 18:10:15 +0000649 long arc_offset; /* offset from beginning of file to start of zip-archive */
Just van Rossum52e14d62002-12-30 22:08:05 +0000650
651 if (strlen(archive) > MAXPATHLEN) {
652 PyErr_SetString(PyExc_OverflowError,
653 "Zip path name is too long");
654 return NULL;
655 }
656 strcpy(path, archive);
657
658 fp = fopen(archive, "rb");
659 if (fp == NULL) {
660 PyErr_Format(ZipImportError, "can't open Zip file: "
661 "'%.200s'", archive);
662 return NULL;
663 }
Just van Rossumf4ecc752003-02-28 08:54:01 +0000664 fseek(fp, -22, SEEK_END);
Thomas Heller354e3d92003-07-22 18:10:15 +0000665 header_position = ftell(fp);
Just van Rossum52e14d62002-12-30 22:08:05 +0000666 if (fread(endof_central_dir, 1, 22, fp) != 22) {
667 fclose(fp);
668 PyErr_Format(ZipImportError, "can't read Zip file: "
669 "'%.200s'", archive);
670 return NULL;
671 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000672 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000673 /* Bad: End of Central Dir signature */
674 fclose(fp);
675 PyErr_Format(ZipImportError, "not a Zip file: "
676 "'%.200s'", archive);
677 return NULL;
678 }
679
Thomas Heller354e3d92003-07-22 18:10:15 +0000680 header_size = get_long((unsigned char *)endof_central_dir + 12);
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000681 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Thomas Heller354e3d92003-07-22 18:10:15 +0000682 arc_offset = header_position - header_offset - header_size;
683 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000684
685 files = PyDict_New();
686 if (files == NULL)
687 goto error;
688
689 length = (long)strlen(path);
690 path[length] = SEP;
691
692 /* Start of Central Directory */
693 count = 0;
694 for (;;) {
695 PyObject *t;
696 int err;
697
698 fseek(fp, header_offset, 0); /* Start of file header */
699 l = PyMarshal_ReadLongFromFile(fp);
700 if (l != 0x02014B50)
701 break; /* Bad: Central Dir File Header */
702 fseek(fp, header_offset + 10, 0);
703 compress = PyMarshal_ReadShortFromFile(fp);
704 time = PyMarshal_ReadShortFromFile(fp);
705 date = PyMarshal_ReadShortFromFile(fp);
706 crc = PyMarshal_ReadLongFromFile(fp);
707 data_size = PyMarshal_ReadLongFromFile(fp);
708 file_size = PyMarshal_ReadLongFromFile(fp);
709 name_size = PyMarshal_ReadShortFromFile(fp);
710 header_size = 46 + name_size +
711 PyMarshal_ReadShortFromFile(fp) +
712 PyMarshal_ReadShortFromFile(fp);
713 fseek(fp, header_offset + 42, 0);
Thomas Heller354e3d92003-07-22 18:10:15 +0000714 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000715 if (name_size > MAXPATHLEN)
716 name_size = MAXPATHLEN;
717
718 p = name;
719 for (i = 0; i < name_size; i++) {
720 *p = (char)getc(fp);
721 if (*p == '/')
722 *p = SEP;
723 p++;
724 }
725 *p = 0; /* Add terminating null byte */
726 header_offset += header_size;
727
728 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
729
730 t = Py_BuildValue("siiiiiii", path, compress, data_size,
731 file_size, file_offset, time, date, crc);
732 if (t == NULL)
733 goto error;
734 err = PyDict_SetItemString(files, name, t);
735 Py_DECREF(t);
736 if (err != 0)
737 goto error;
738 count++;
739 }
740 fclose(fp);
741 if (Py_VerboseFlag)
742 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
743 count, archive);
744 return files;
745error:
746 fclose(fp);
747 Py_XDECREF(files);
748 return NULL;
749}
750
751/* Return the zlib.decompress function object, or NULL if zlib couldn't
752 be imported. The function is cached when found, so subsequent calls
753 don't import zlib again. Returns a *borrowed* reference.
754 XXX This makes zlib.decompress immortal. */
755static PyObject *
756get_decompress_func(void)
757{
758 static PyObject *decompress = NULL;
759
760 if (decompress == NULL) {
761 PyObject *zlib;
762 static int importing_zlib = 0;
763
764 if (importing_zlib != 0)
765 /* Someone has a zlib.py[co] in their Zip file;
766 let's avoid a stack overflow. */
767 return NULL;
768 importing_zlib = 1;
769 zlib = PyImport_ImportModule("zlib"); /* import zlib */
770 importing_zlib = 0;
771 if (zlib != NULL) {
772 decompress = PyObject_GetAttrString(zlib,
773 "decompress");
774 Py_DECREF(zlib);
775 }
776 else
777 PyErr_Clear();
778 if (Py_VerboseFlag)
779 PySys_WriteStderr("# zipimport: zlib %s\n",
780 zlib != NULL ? "available": "UNAVAILABLE");
781 }
782 return decompress;
783}
784
785/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
786 data as a new reference. */
787static PyObject *
788get_data(char *archive, PyObject *toc_entry)
789{
790 PyObject *raw_data, *data = NULL, *decompress;
791 char *buf;
792 FILE *fp;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000793 int err;
794 Py_ssize_t bytes_read = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000795 long l;
796 char *datapath;
797 long compress, data_size, file_size, file_offset;
798 long time, date, crc;
799
Neal Norwitz0c0aad92003-02-18 03:37:49 +0000800 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
Just van Rossum52e14d62002-12-30 22:08:05 +0000801 &data_size, &file_size, &file_offset, &time,
802 &date, &crc)) {
803 return NULL;
804 }
805
806 fp = fopen(archive, "rb");
807 if (!fp) {
808 PyErr_Format(PyExc_IOError,
809 "zipimport: can not open file %s", archive);
810 return NULL;
811 }
812
813 /* Check to make sure the local file header is correct */
814 fseek(fp, file_offset, 0);
815 l = PyMarshal_ReadLongFromFile(fp);
816 if (l != 0x04034B50) {
817 /* Bad: Local File Header */
818 PyErr_Format(ZipImportError,
819 "bad local file header in %s",
820 archive);
821 fclose(fp);
822 return NULL;
823 }
824 fseek(fp, file_offset + 26, 0);
825 l = 30 + PyMarshal_ReadShortFromFile(fp) +
826 PyMarshal_ReadShortFromFile(fp); /* local header size */
827 file_offset += l; /* Start of file data */
828
829 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
830 data_size : data_size + 1);
831 if (raw_data == NULL) {
832 fclose(fp);
833 return NULL;
834 }
835 buf = PyString_AsString(raw_data);
836
837 err = fseek(fp, file_offset, 0);
838 if (err == 0)
839 bytes_read = fread(buf, 1, data_size, fp);
840 fclose(fp);
841 if (err || bytes_read != data_size) {
842 PyErr_SetString(PyExc_IOError,
843 "zipimport: can't read data");
844 Py_DECREF(raw_data);
845 return NULL;
846 }
847
848 if (compress != 0) {
849 buf[data_size] = 'Z'; /* saw this in zipfile.py */
850 data_size++;
851 }
852 buf[data_size] = '\0';
853
Guido van Rossumad8d3002007-08-03 18:40:49 +0000854 if (compress == 0) { /* data is not compressed */
Guido van Rossum76f2b242007-08-17 14:33:37 +0000855 data = PyBytes_FromStringAndSize(buf, data_size);
856 Py_DECREF(raw_data);
857 return data;
Guido van Rossumad8d3002007-08-03 18:40:49 +0000858 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000859
860 /* Decompress with zlib */
861 decompress = get_decompress_func();
862 if (decompress == NULL) {
863 PyErr_SetString(ZipImportError,
864 "can't decompress data; "
865 "zlib not available");
866 goto error;
867 }
Just van Rossumee8f10f2003-09-07 13:36:48 +0000868 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000869error:
870 Py_DECREF(raw_data);
871 return data;
872}
873
874/* Lenient date/time comparison function. The precision of the mtime
875 in the archive is lower than the mtime stored in a .pyc: we
876 must allow a difference of at most one second. */
877static int
878eq_mtime(time_t t1, time_t t2)
879{
880 time_t d = t1 - t2;
881 if (d < 0)
882 d = -d;
883 /* dostime only stores even seconds, so be lenient */
884 return d <= 1;
885}
886
887/* Given the contents of a .py[co] file in a buffer, unmarshal the data
888 and return the code object. Return None if it the magic word doesn't
889 match (we do this instead of raising an exception as we fall back
890 to .py if available and we don't want to mask other errors).
891 Returns a new reference. */
892static PyObject *
893unmarshal_code(char *pathname, PyObject *data, time_t mtime)
894{
895 PyObject *code;
Guido van Rossumad8d3002007-08-03 18:40:49 +0000896 char *buf = PyBytes_AsString(data);
897 Py_ssize_t size = PyBytes_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +0000898
899 if (size <= 9) {
900 PyErr_SetString(ZipImportError,
901 "bad pyc data");
902 return NULL;
903 }
904
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000905 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000906 if (Py_VerboseFlag)
907 PySys_WriteStderr("# %s has bad magic\n",
908 pathname);
909 Py_INCREF(Py_None);
910 return Py_None; /* signal caller to try alternative */
911 }
912
Just van Rossum9a3129c2003-01-03 11:18:56 +0000913 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
914 mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000915 if (Py_VerboseFlag)
916 PySys_WriteStderr("# %s has bad mtime\n",
917 pathname);
918 Py_INCREF(Py_None);
919 return Py_None; /* signal caller to try alternative */
920 }
921
922 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
923 if (code == NULL)
924 return NULL;
925 if (!PyCode_Check(code)) {
926 Py_DECREF(code);
927 PyErr_Format(PyExc_TypeError,
928 "compiled module %.200s is not a code object",
929 pathname);
930 return NULL;
931 }
932 return code;
933}
934
935/* Replace any occurances of "\r\n?" in the input string with "\n".
936 This converts DOS and Mac line endings to Unix line endings.
937 Also append a trailing "\n" to be compatible with
938 PyParser_SimpleParseFile(). Returns a new reference. */
939static PyObject *
940normalize_line_endings(PyObject *source)
941{
Guido van Rossumad8d3002007-08-03 18:40:49 +0000942 char *buf, *q, *p = PyBytes_AsString(source);
Just van Rossum52e14d62002-12-30 22:08:05 +0000943 PyObject *fixed_source;
Guido van Rossumad8d3002007-08-03 18:40:49 +0000944 int len = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000945
Guido van Rossumad8d3002007-08-03 18:40:49 +0000946 if (!p) {
947 return PyBytes_FromStringAndSize("\n\0", 2);
948 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000949
Just van Rossum9a3129c2003-01-03 11:18:56 +0000950 /* one char extra for trailing \n and one for terminating \0 */
Guido van Rossumad8d3002007-08-03 18:40:49 +0000951 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
Just van Rossum9a3129c2003-01-03 11:18:56 +0000952 if (buf == NULL) {
953 PyErr_SetString(PyExc_MemoryError,
954 "zipimport: no memory to allocate "
955 "source buffer");
Just van Rossum52e14d62002-12-30 22:08:05 +0000956 return NULL;
Just van Rossum9a3129c2003-01-03 11:18:56 +0000957 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000958 /* replace "\r\n?" by "\n" */
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000959 for (q = buf; *p != '\0'; p++) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000960 if (*p == '\r') {
961 *q++ = '\n';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000962 if (*(p + 1) == '\n')
Just van Rossum52e14d62002-12-30 22:08:05 +0000963 p++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000964 }
965 else
966 *q++ = *p;
Guido van Rossumad8d3002007-08-03 18:40:49 +0000967 len++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000968 }
969 *q++ = '\n'; /* add trailing \n */
970 *q = '\0';
Guido van Rossumad8d3002007-08-03 18:40:49 +0000971 fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
Just van Rossum9a3129c2003-01-03 11:18:56 +0000972 PyMem_Free(buf);
Just van Rossum52e14d62002-12-30 22:08:05 +0000973 return fixed_source;
974}
975
976/* Given a string buffer containing Python source code, compile it
977 return and return a code object as a new reference. */
978static PyObject *
979compile_source(char *pathname, PyObject *source)
980{
981 PyObject *code, *fixed_source;
982
983 fixed_source = normalize_line_endings(source);
984 if (fixed_source == NULL)
985 return NULL;
986
Guido van Rossumad8d3002007-08-03 18:40:49 +0000987 code = Py_CompileString(PyBytes_AsString(fixed_source), pathname,
Just van Rossum52e14d62002-12-30 22:08:05 +0000988 Py_file_input);
989 Py_DECREF(fixed_source);
990 return code;
991}
992
993/* Convert the date/time values found in the Zip archive to a value
994 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +0000995static time_t
996parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +0000997{
998 struct tm stm;
999
1000 stm.tm_sec = (dostime & 0x1f) * 2;
1001 stm.tm_min = (dostime >> 5) & 0x3f;
1002 stm.tm_hour = (dostime >> 11) & 0x1f;
1003 stm.tm_mday = dosdate & 0x1f;
1004 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1005 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
Just van Rossum547eb422003-04-08 20:07:15 +00001006 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001007
1008 return mktime(&stm);
1009}
1010
1011/* Given a path to a .pyc or .pyo file in the archive, return the
1012 modifictaion time of the matching .py file, or 0 if no source
1013 is available. */
1014static time_t
1015get_mtime_of_source(ZipImporter *self, char *path)
1016{
1017 PyObject *toc_entry;
1018 time_t mtime = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001019 Py_ssize_t lastchar = strlen(path) - 1;
Just van Rossum52e14d62002-12-30 22:08:05 +00001020 char savechar = path[lastchar];
1021 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1022 toc_entry = PyDict_GetItemString(self->files, path);
1023 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1024 PyTuple_Size(toc_entry) == 8) {
Just van Rossumf8b6de12002-12-31 09:51:59 +00001025 /* fetch the time stamp of the .py file for comparison
1026 with an embedded pyc time stamp */
1027 int time, date;
Just van Rossum52e14d62002-12-30 22:08:05 +00001028 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1029 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1030 mtime = parse_dostime(time, date);
1031 }
1032 path[lastchar] = savechar;
1033 return mtime;
1034}
1035
1036/* Return the code object for the module named by 'fullname' from the
1037 Zip archive as a new reference. */
1038static PyObject *
1039get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1040 time_t mtime, PyObject *toc_entry)
1041{
1042 PyObject *data, *code;
1043 char *modpath;
1044 char *archive = PyString_AsString(self->archive);
1045
1046 if (archive == NULL)
1047 return NULL;
1048
1049 data = get_data(archive, toc_entry);
1050 if (data == NULL)
1051 return NULL;
1052
1053 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1054
1055 if (isbytecode) {
1056 code = unmarshal_code(modpath, data, mtime);
1057 }
1058 else {
1059 code = compile_source(modpath, data);
1060 }
1061 Py_DECREF(data);
1062 return code;
1063}
1064
1065/* Get the code object assoiciated with the module specified by
1066 'fullname'. */
1067static PyObject *
1068get_module_code(ZipImporter *self, char *fullname,
1069 int *p_ispackage, char **p_modpath)
1070{
1071 PyObject *toc_entry;
1072 char *subname, path[MAXPATHLEN + 1];
1073 int len;
1074 struct st_zip_searchorder *zso;
1075
1076 subname = get_subname(fullname);
1077
1078 len = make_filename(PyString_AsString(self->prefix), subname, path);
1079 if (len < 0)
1080 return NULL;
1081
1082 for (zso = zip_searchorder; *zso->suffix; zso++) {
1083 PyObject *code = NULL;
1084
1085 strcpy(path + len, zso->suffix);
1086 if (Py_VerboseFlag > 1)
1087 PySys_WriteStderr("# trying %s%c%s\n",
1088 PyString_AsString(self->archive),
1089 SEP, path);
1090 toc_entry = PyDict_GetItemString(self->files, path);
1091 if (toc_entry != NULL) {
1092 time_t mtime = 0;
1093 int ispackage = zso->type & IS_PACKAGE;
1094 int isbytecode = zso->type & IS_BYTECODE;
1095
1096 if (isbytecode)
1097 mtime = get_mtime_of_source(self, path);
1098 if (p_ispackage != NULL)
1099 *p_ispackage = ispackage;
1100 code = get_code_from_data(self, ispackage,
1101 isbytecode, mtime,
1102 toc_entry);
1103 if (code == Py_None) {
1104 /* bad magic number or non-matching mtime
1105 in byte code, try next */
1106 Py_DECREF(code);
1107 continue;
1108 }
1109 if (code != NULL && p_modpath != NULL)
1110 *p_modpath = PyString_AsString(
1111 PyTuple_GetItem(toc_entry, 0));
1112 return code;
1113 }
1114 }
1115 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1116 return NULL;
1117}
1118
1119
1120/* Module init */
1121
1122PyDoc_STRVAR(zipimport_doc,
1123"zipimport provides support for importing Python modules from Zip archives.\n\
1124\n\
1125This module exports three objects:\n\
1126- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001127- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001128 subclass of ImportError, so it can be caught as ImportError, too.\n\
1129- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1130 info dicts, as used in zipimporter._files.\n\
1131\n\
1132It is usually not needed to use the zipimport module explicitly; it is\n\
1133used by the builtin import mechanism for sys.path items that are paths\n\
1134to Zip archives.");
1135
1136PyMODINIT_FUNC
1137initzipimport(void)
1138{
1139 PyObject *mod;
1140
1141 if (PyType_Ready(&ZipImporter_Type) < 0)
1142 return;
1143
1144 /* Correct directory separator */
1145 zip_searchorder[0].suffix[0] = SEP;
1146 zip_searchorder[1].suffix[0] = SEP;
1147 zip_searchorder[2].suffix[0] = SEP;
1148 if (Py_OptimizeFlag) {
1149 /* Reverse *.pyc and *.pyo */
1150 struct st_zip_searchorder tmp;
1151 tmp = zip_searchorder[0];
1152 zip_searchorder[0] = zip_searchorder[1];
1153 zip_searchorder[1] = tmp;
1154 tmp = zip_searchorder[3];
1155 zip_searchorder[3] = zip_searchorder[4];
1156 zip_searchorder[4] = tmp;
1157 }
1158
1159 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1160 NULL, PYTHON_API_VERSION);
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00001161 if (mod == NULL)
1162 return;
Just van Rossum52e14d62002-12-30 22:08:05 +00001163
1164 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1165 PyExc_ImportError, NULL);
1166 if (ZipImportError == NULL)
1167 return;
1168
1169 Py_INCREF(ZipImportError);
1170 if (PyModule_AddObject(mod, "ZipImportError",
1171 ZipImportError) < 0)
1172 return;
1173
1174 Py_INCREF(&ZipImporter_Type);
1175 if (PyModule_AddObject(mod, "zipimporter",
1176 (PyObject *)&ZipImporter_Type) < 0)
1177 return;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001178
Just van Rossum52e14d62002-12-30 22:08:05 +00001179 zip_directory_cache = PyDict_New();
1180 if (zip_directory_cache == NULL)
1181 return;
1182 Py_INCREF(zip_directory_cache);
1183 if (PyModule_AddObject(mod, "_zip_directory_cache",
1184 zip_directory_cache) < 0)
1185 return;
1186}