blob: 42cb9d07a97d1185b0b73bcfac13393ed23f6e1b [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
13 char suffix[14];
14 int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Just van Rossum52e14d62002-12-30 22:08:05 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
41};
42
Just van Rossum52e14d62002-12-30 22:08:05 +000043static PyObject *ZipImportError;
44static PyObject *zip_directory_cache = NULL;
45
46/* forward decls */
47static PyObject *read_directory(char *archive);
48static PyObject *get_data(char *archive, PyObject *toc_entry);
49static PyObject *get_module_code(ZipImporter *self, char *fullname,
50 int *p_ispackage, char **p_modpath);
51
52
53#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
54
55
56/* zipimporter.__init__
57 Split the "subdirectory" from the Zip archive path, lookup a matching
58 entry in sys.path_importer_cache, fetch the file directory from there
59 if found, or else read it from the archive. */
60static int
61zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
62{
63 char *path, *p, *prefix, buf[MAXPATHLEN+2];
Neal Norwitzd39d8612006-01-08 01:03:36 +000064 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +000065
Georg Brandl02c42872005-08-26 06:42:30 +000066 if (!_PyArg_NoKeywords("zipimporter()", kwds))
67 return -1;
68
Just van Rossum52e14d62002-12-30 22:08:05 +000069 if (!PyArg_ParseTuple(args, "s:zipimporter",
70 &path))
71 return -1;
72
73 len = strlen(path);
74 if (len == 0) {
75 PyErr_SetString(ZipImportError, "archive path is empty");
76 return -1;
77 }
78 if (len >= MAXPATHLEN) {
79 PyErr_SetString(ZipImportError,
80 "archive path too long");
81 return -1;
82 }
83 strcpy(buf, path);
84
85#ifdef ALTSEP
86 for (p = buf; *p; p++) {
87 if (*p == ALTSEP)
88 *p = SEP;
89 }
90#endif
91
92 path = NULL;
93 prefix = NULL;
94 for (;;) {
95 struct stat statbuf;
96 int rv;
97
98 rv = stat(buf, &statbuf);
99 if (rv == 0) {
100 /* it exists */
101 if (S_ISREG(statbuf.st_mode))
102 /* it's a file */
103 path = buf;
104 break;
105 }
106 /* back up one path element */
Just van Rossumd35c6db2003-01-02 12:55:48 +0000107 p = strrchr(buf, SEP);
Just van Rossum52e14d62002-12-30 22:08:05 +0000108 if (prefix != NULL)
109 *prefix = SEP;
110 if (p == NULL)
111 break;
112 *p = '\0';
113 prefix = p;
114 }
115 if (path != NULL) {
116 PyObject *files;
117 files = PyDict_GetItemString(zip_directory_cache, path);
118 if (files == NULL) {
119 files = read_directory(buf);
120 if (files == NULL)
121 return -1;
122 if (PyDict_SetItemString(zip_directory_cache, path,
123 files) != 0)
124 return -1;
125 }
126 else
127 Py_INCREF(files);
128 self->files = files;
129 }
130 else {
131 PyErr_SetString(ZipImportError, "not a Zip file");
132 return -1;
133 }
134
135 if (prefix == NULL)
136 prefix = "";
137 else {
138 prefix++;
139 len = strlen(prefix);
140 if (prefix[len-1] != SEP) {
141 /* add trailing SEP */
142 prefix[len] = SEP;
143 prefix[len + 1] = '\0';
144 }
145 }
146
147 self->archive = PyString_FromString(buf);
148 if (self->archive == NULL)
149 return -1;
150
151 self->prefix = PyString_FromString(prefix);
152 if (self->prefix == NULL)
153 return -1;
154
155 return 0;
156}
157
158/* GC support. */
159static int
160zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
161{
162 ZipImporter *self = (ZipImporter *)obj;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000163 Py_VISIT(self->files);
Just van Rossum52e14d62002-12-30 22:08:05 +0000164 return 0;
165}
166
167static void
168zipimporter_dealloc(ZipImporter *self)
169{
170 PyObject_GC_UnTrack(self);
171 Py_XDECREF(self->archive);
Just van Rossumf8b6de12002-12-31 09:51:59 +0000172 Py_XDECREF(self->prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000173 Py_XDECREF(self->files);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000174 Py_Type(self)->tp_free((PyObject *)self);
Just van Rossum52e14d62002-12-30 22:08:05 +0000175}
176
177static PyObject *
178zipimporter_repr(ZipImporter *self)
179{
Just van Rossum52e14d62002-12-30 22:08:05 +0000180 char *archive = "???";
181 char *prefix = "";
182
183 if (self->archive != NULL && PyString_Check(self->archive))
184 archive = PyString_AsString(self->archive);
185 if (self->prefix != NULL && PyString_Check(self->prefix))
186 prefix = PyString_AsString(self->prefix);
187 if (prefix != NULL && *prefix)
Walter Dörwald5b0443c2007-06-05 16:19:33 +0000188 return PyUnicode_FromFormat("<zipimporter object \"%.300s%c%.150s\">",
189 archive, SEP, prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000190 else
Walter Dörwald5b0443c2007-06-05 16:19:33 +0000191 return PyUnicode_FromFormat("<zipimporter object \"%.300s\">",
192 archive);
Just van Rossum52e14d62002-12-30 22:08:05 +0000193}
194
195/* return fullname.split(".")[-1] */
196static char *
197get_subname(char *fullname)
198{
199 char *subname = strrchr(fullname, '.');
200 if (subname == NULL)
201 subname = fullname;
202 else
203 subname++;
204 return subname;
205}
206
207/* Given a (sub)modulename, write the potential file path in the
208 archive (without extension) to the path buffer. Return the
209 length of the resulting string. */
210static int
211make_filename(char *prefix, char *name, char *path)
212{
Neal Norwitzd39d8612006-01-08 01:03:36 +0000213 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000214 char *p;
215
216 len = strlen(prefix);
217
218 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
219 if (len + strlen(name) + 13 >= MAXPATHLEN) {
220 PyErr_SetString(ZipImportError, "path too long");
221 return -1;
222 }
223
224 strcpy(path, prefix);
225 strcpy(path + len, name);
226 for (p = path + len; *p; p++) {
227 if (*p == '.')
228 *p = SEP;
229 }
230 len += strlen(name);
Martin v. Löwis18e16552006-02-15 17:27:45 +0000231 assert(len < INT_MAX);
Neal Norwitzd39d8612006-01-08 01:03:36 +0000232 return (int)len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000233}
234
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000235enum zi_module_info {
Just van Rossum52e14d62002-12-30 22:08:05 +0000236 MI_ERROR,
237 MI_NOT_FOUND,
238 MI_MODULE,
239 MI_PACKAGE
240};
241
242/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000243static enum zi_module_info
Just van Rossum52e14d62002-12-30 22:08:05 +0000244get_module_info(ZipImporter *self, char *fullname)
245{
246 char *subname, path[MAXPATHLEN + 1];
247 int len;
248 struct st_zip_searchorder *zso;
249
250 subname = get_subname(fullname);
251
252 len = make_filename(PyString_AsString(self->prefix), subname, path);
253 if (len < 0)
254 return MI_ERROR;
255
256 for (zso = zip_searchorder; *zso->suffix; zso++) {
257 strcpy(path + len, zso->suffix);
258 if (PyDict_GetItemString(self->files, path) != NULL) {
259 if (zso->type & IS_PACKAGE)
260 return MI_PACKAGE;
261 else
262 return MI_MODULE;
263 }
264 }
265 return MI_NOT_FOUND;
266}
267
268/* Check whether we can satisfy the import of the module named by
269 'fullname'. Return self if we can, None if we can't. */
270static PyObject *
271zipimporter_find_module(PyObject *obj, PyObject *args)
272{
273 ZipImporter *self = (ZipImporter *)obj;
274 PyObject *path = NULL;
275 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000276 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000277
278 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
279 &fullname, &path))
280 return NULL;
281
282 mi = get_module_info(self, fullname);
283 if (mi == MI_ERROR)
284 return NULL;
285 if (mi == MI_NOT_FOUND) {
286 Py_INCREF(Py_None);
287 return Py_None;
288 }
289 Py_INCREF(self);
290 return (PyObject *)self;
291}
292
293/* Load and return the module named by 'fullname'. */
294static PyObject *
295zipimporter_load_module(PyObject *obj, PyObject *args)
296{
297 ZipImporter *self = (ZipImporter *)obj;
298 PyObject *code, *mod, *dict;
299 char *fullname, *modpath;
300 int ispackage;
301
302 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
303 &fullname))
304 return NULL;
305
306 code = get_module_code(self, fullname, &ispackage, &modpath);
307 if (code == NULL)
308 return NULL;
309
310 mod = PyImport_AddModule(fullname);
311 if (mod == NULL) {
312 Py_DECREF(code);
313 return NULL;
314 }
315 dict = PyModule_GetDict(mod);
316
317 /* mod.__loader__ = self */
318 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
319 goto error;
320
321 if (ispackage) {
322 /* add __path__ to the module *before* the code gets
323 executed */
324 PyObject *pkgpath, *fullpath;
325 char *prefix = PyString_AsString(self->prefix);
326 char *subname = get_subname(fullname);
327 int err;
328
329 fullpath = PyString_FromFormat("%s%c%s%s",
330 PyString_AsString(self->archive),
331 SEP,
332 *prefix ? prefix : "",
333 subname);
334 if (fullpath == NULL)
335 goto error;
336
337 pkgpath = Py_BuildValue("[O]", fullpath);
338 Py_DECREF(fullpath);
339 if (pkgpath == NULL)
340 goto error;
341 err = PyDict_SetItemString(dict, "__path__", pkgpath);
342 Py_DECREF(pkgpath);
343 if (err != 0)
344 goto error;
345 }
346 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
347 Py_DECREF(code);
348 if (Py_VerboseFlag)
349 PySys_WriteStderr("import %s # loaded from Zip %s\n",
350 fullname, modpath);
351 return mod;
352error:
353 Py_DECREF(code);
354 Py_DECREF(mod);
355 return NULL;
356}
357
358/* Return a bool signifying whether the module is a package or not. */
359static PyObject *
360zipimporter_is_package(PyObject *obj, PyObject *args)
361{
362 ZipImporter *self = (ZipImporter *)obj;
363 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000364 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000365
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000366 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
Just van Rossum52e14d62002-12-30 22:08:05 +0000367 &fullname))
368 return NULL;
369
370 mi = get_module_info(self, fullname);
371 if (mi == MI_ERROR)
372 return NULL;
373 if (mi == MI_NOT_FOUND) {
374 PyErr_Format(ZipImportError, "can't find module '%.200s'",
375 fullname);
376 return NULL;
377 }
378 return PyBool_FromLong(mi == MI_PACKAGE);
379}
380
381static PyObject *
382zipimporter_get_data(PyObject *obj, PyObject *args)
383{
384 ZipImporter *self = (ZipImporter *)obj;
385 char *path;
386#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000387 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000388#endif
389 PyObject *toc_entry;
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000390 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000391
392 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
393 return NULL;
394
395#ifdef ALTSEP
396 if (strlen(path) >= MAXPATHLEN) {
397 PyErr_SetString(ZipImportError, "path too long");
398 return NULL;
399 }
400 strcpy(buf, path);
401 for (p = buf; *p; p++) {
402 if (*p == ALTSEP)
403 *p = SEP;
404 }
405 path = buf;
406#endif
407 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000408 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000409 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
410 path[len] == SEP) {
411 path = path + len + 1;
412 }
413
414 toc_entry = PyDict_GetItemString(self->files, path);
415 if (toc_entry == NULL) {
Georg Brandle9b19492006-02-19 09:38:58 +0000416 PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
Just van Rossum52e14d62002-12-30 22:08:05 +0000417 return NULL;
418 }
419 return get_data(PyString_AsString(self->archive), toc_entry);
420}
421
422static PyObject *
423zipimporter_get_code(PyObject *obj, PyObject *args)
424{
425 ZipImporter *self = (ZipImporter *)obj;
426 char *fullname;
427
428 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
429 return NULL;
430
431 return get_module_code(self, fullname, NULL, NULL);
432}
433
434static PyObject *
435zipimporter_get_source(PyObject *obj, PyObject *args)
436{
437 ZipImporter *self = (ZipImporter *)obj;
438 PyObject *toc_entry;
439 char *fullname, *subname, path[MAXPATHLEN+1];
440 int len;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000441 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000442
443 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
444 return NULL;
445
446 mi = get_module_info(self, fullname);
447 if (mi == MI_ERROR)
448 return NULL;
449 if (mi == MI_NOT_FOUND) {
450 PyErr_Format(ZipImportError, "can't find module '%.200s'",
451 fullname);
452 return NULL;
453 }
454 subname = get_subname(fullname);
455
456 len = make_filename(PyString_AsString(self->prefix), subname, path);
457 if (len < 0)
458 return NULL;
459
460 if (mi == MI_PACKAGE) {
461 path[len] = SEP;
462 strcpy(path + len + 1, "__init__.py");
463 }
464 else
465 strcpy(path + len, ".py");
466
467 toc_entry = PyDict_GetItemString(self->files, path);
Guido van Rossumad8d3002007-08-03 18:40:49 +0000468 if (toc_entry != NULL) {
469 PyObject *bytes = get_data(PyString_AsString(self->archive), toc_entry);
470 PyObject *res = PyUnicode_FromString(PyBytes_AsString(bytes));
471 Py_XDECREF(bytes);
472 return res;
473 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000474
475 /* we have the module, but no source */
476 Py_INCREF(Py_None);
477 return Py_None;
478}
479
480PyDoc_STRVAR(doc_find_module,
481"find_module(fullname, path=None) -> self or None.\n\
482\n\
483Search for a module specified by 'fullname'. 'fullname' must be the\n\
484fully qualified (dotted) module name. It returns the zipimporter\n\
485instance itself if the module was found, or None if it wasn't.\n\
486The optional 'path' argument is ignored -- it's there for compatibility\n\
487with the importer protocol.");
488
489PyDoc_STRVAR(doc_load_module,
490"load_module(fullname) -> module.\n\
491\n\
492Load the module specified by 'fullname'. 'fullname' must be the\n\
493fully qualified (dotted) module name. It returns the imported\n\
494module, or raises ZipImportError if it wasn't found.");
495
496PyDoc_STRVAR(doc_get_data,
497"get_data(pathname) -> string with file data.\n\
498\n\
499Return the data associated with 'pathname'. Raise IOError if\n\
500the file wasn't found.");
501
502PyDoc_STRVAR(doc_is_package,
503"is_package(fullname) -> bool.\n\
504\n\
505Return True if the module specified by fullname is a package.\n\
506Raise ZipImportError is the module couldn't be found.");
507
508PyDoc_STRVAR(doc_get_code,
509"get_code(fullname) -> code object.\n\
510\n\
511Return the code object for the specified module. Raise ZipImportError\n\
512is the module couldn't be found.");
513
514PyDoc_STRVAR(doc_get_source,
515"get_source(fullname) -> source string.\n\
516\n\
517Return the source code for the specified module. Raise ZipImportError\n\
518is the module couldn't be found, return None if the archive does\n\
519contain the module, but has no source for it.");
520
521static PyMethodDef zipimporter_methods[] = {
522 {"find_module", zipimporter_find_module, METH_VARARGS,
523 doc_find_module},
524 {"load_module", zipimporter_load_module, METH_VARARGS,
525 doc_load_module},
526 {"get_data", zipimporter_get_data, METH_VARARGS,
527 doc_get_data},
528 {"get_code", zipimporter_get_code, METH_VARARGS,
529 doc_get_code},
530 {"get_source", zipimporter_get_source, METH_VARARGS,
531 doc_get_source},
532 {"is_package", zipimporter_is_package, METH_VARARGS,
533 doc_is_package},
534 {NULL, NULL} /* sentinel */
535};
536
537static PyMemberDef zipimporter_members[] = {
538 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
539 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
540 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
541 {NULL}
542};
543
544PyDoc_STRVAR(zipimporter_doc,
545"zipimporter(archivepath) -> zipimporter object\n\
546\n\
547Create a new zipimporter instance. 'archivepath' must be a path to\n\
548a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
549a valid Zip archive.");
550
551#define DEFERRED_ADDRESS(ADDR) 0
552
553static PyTypeObject ZipImporter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000554 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
Just van Rossum52e14d62002-12-30 22:08:05 +0000555 "zipimport.zipimporter",
556 sizeof(ZipImporter),
557 0, /* tp_itemsize */
558 (destructor)zipimporter_dealloc, /* tp_dealloc */
559 0, /* tp_print */
560 0, /* tp_getattr */
561 0, /* tp_setattr */
562 0, /* tp_compare */
563 (reprfunc)zipimporter_repr, /* tp_repr */
564 0, /* tp_as_number */
565 0, /* tp_as_sequence */
566 0, /* tp_as_mapping */
567 0, /* tp_hash */
568 0, /* tp_call */
569 0, /* tp_str */
570 PyObject_GenericGetAttr, /* tp_getattro */
571 0, /* tp_setattro */
572 0, /* tp_as_buffer */
573 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
574 Py_TPFLAGS_HAVE_GC, /* tp_flags */
575 zipimporter_doc, /* tp_doc */
576 zipimporter_traverse, /* tp_traverse */
577 0, /* tp_clear */
578 0, /* tp_richcompare */
579 0, /* tp_weaklistoffset */
580 0, /* tp_iter */
581 0, /* tp_iternext */
582 zipimporter_methods, /* tp_methods */
583 zipimporter_members, /* tp_members */
584 0, /* tp_getset */
585 0, /* tp_base */
586 0, /* tp_dict */
587 0, /* tp_descr_get */
588 0, /* tp_descr_set */
589 0, /* tp_dictoffset */
590 (initproc)zipimporter_init, /* tp_init */
591 PyType_GenericAlloc, /* tp_alloc */
592 PyType_GenericNew, /* tp_new */
593 PyObject_GC_Del, /* tp_free */
594};
595
596
597/* implementation */
598
Just van Rossum52e14d62002-12-30 22:08:05 +0000599/* Given a buffer, return the long that is represented by the first
600 4 bytes, encoded as little endian. This partially reimplements
601 marshal.c:r_long() */
602static long
603get_long(unsigned char *buf) {
604 long x;
605 x = buf[0];
606 x |= (long)buf[1] << 8;
607 x |= (long)buf[2] << 16;
608 x |= (long)buf[3] << 24;
609#if SIZEOF_LONG > 4
610 /* Sign extension for 64-bit machines */
611 x |= -(x & 0x80000000L);
612#endif
613 return x;
614}
615
616/*
617 read_directory(archive) -> files dict (new reference)
618
619 Given a path to a Zip archive, build a dict, mapping file names
620 (local to the archive, using SEP as a separator) to toc entries.
621
622 A toc_entry is a tuple:
623
Fred Drakef5b7fd22005-11-11 19:34:56 +0000624 (__file__, # value to use for __file__, available for all files
625 compress, # compression kind; 0 for uncompressed
Just van Rossum52e14d62002-12-30 22:08:05 +0000626 data_size, # size of compressed data on disk
627 file_size, # size of decompressed data
628 file_offset, # offset of file header from start of archive
629 time, # mod time of file (in dos format)
630 date, # mod data of file (in dos format)
631 crc, # crc checksum of the data
632 )
633
634 Directories can be recognized by the trailing SEP in the name,
635 data_size and file_offset are 0.
636*/
637static PyObject *
638read_directory(char *archive)
639{
640 PyObject *files = NULL;
641 FILE *fp;
642 long compress, crc, data_size, file_size, file_offset, date, time;
Thomas Heller354e3d92003-07-22 18:10:15 +0000643 long header_offset, name_size, header_size, header_position;
Neal Norwitzd39d8612006-01-08 01:03:36 +0000644 long i, l, count;
645 size_t length;
Just van Rossum52e14d62002-12-30 22:08:05 +0000646 char path[MAXPATHLEN + 5];
647 char name[MAXPATHLEN + 5];
648 char *p, endof_central_dir[22];
Thomas Heller354e3d92003-07-22 18:10:15 +0000649 long arc_offset; /* offset from beginning of file to start of zip-archive */
Just van Rossum52e14d62002-12-30 22:08:05 +0000650
651 if (strlen(archive) > MAXPATHLEN) {
652 PyErr_SetString(PyExc_OverflowError,
653 "Zip path name is too long");
654 return NULL;
655 }
656 strcpy(path, archive);
657
658 fp = fopen(archive, "rb");
659 if (fp == NULL) {
660 PyErr_Format(ZipImportError, "can't open Zip file: "
661 "'%.200s'", archive);
662 return NULL;
663 }
Just van Rossumf4ecc752003-02-28 08:54:01 +0000664 fseek(fp, -22, SEEK_END);
Thomas Heller354e3d92003-07-22 18:10:15 +0000665 header_position = ftell(fp);
Just van Rossum52e14d62002-12-30 22:08:05 +0000666 if (fread(endof_central_dir, 1, 22, fp) != 22) {
667 fclose(fp);
668 PyErr_Format(ZipImportError, "can't read Zip file: "
669 "'%.200s'", archive);
670 return NULL;
671 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000672 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000673 /* Bad: End of Central Dir signature */
674 fclose(fp);
675 PyErr_Format(ZipImportError, "not a Zip file: "
676 "'%.200s'", archive);
677 return NULL;
678 }
679
Thomas Heller354e3d92003-07-22 18:10:15 +0000680 header_size = get_long((unsigned char *)endof_central_dir + 12);
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000681 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Thomas Heller354e3d92003-07-22 18:10:15 +0000682 arc_offset = header_position - header_offset - header_size;
683 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000684
685 files = PyDict_New();
686 if (files == NULL)
687 goto error;
688
689 length = (long)strlen(path);
690 path[length] = SEP;
691
692 /* Start of Central Directory */
693 count = 0;
694 for (;;) {
695 PyObject *t;
696 int err;
697
698 fseek(fp, header_offset, 0); /* Start of file header */
699 l = PyMarshal_ReadLongFromFile(fp);
700 if (l != 0x02014B50)
701 break; /* Bad: Central Dir File Header */
702 fseek(fp, header_offset + 10, 0);
703 compress = PyMarshal_ReadShortFromFile(fp);
704 time = PyMarshal_ReadShortFromFile(fp);
705 date = PyMarshal_ReadShortFromFile(fp);
706 crc = PyMarshal_ReadLongFromFile(fp);
707 data_size = PyMarshal_ReadLongFromFile(fp);
708 file_size = PyMarshal_ReadLongFromFile(fp);
709 name_size = PyMarshal_ReadShortFromFile(fp);
710 header_size = 46 + name_size +
711 PyMarshal_ReadShortFromFile(fp) +
712 PyMarshal_ReadShortFromFile(fp);
713 fseek(fp, header_offset + 42, 0);
Thomas Heller354e3d92003-07-22 18:10:15 +0000714 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000715 if (name_size > MAXPATHLEN)
716 name_size = MAXPATHLEN;
717
718 p = name;
719 for (i = 0; i < name_size; i++) {
720 *p = (char)getc(fp);
721 if (*p == '/')
722 *p = SEP;
723 p++;
724 }
725 *p = 0; /* Add terminating null byte */
726 header_offset += header_size;
727
728 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
729
730 t = Py_BuildValue("siiiiiii", path, compress, data_size,
731 file_size, file_offset, time, date, crc);
732 if (t == NULL)
733 goto error;
734 err = PyDict_SetItemString(files, name, t);
735 Py_DECREF(t);
736 if (err != 0)
737 goto error;
738 count++;
739 }
740 fclose(fp);
741 if (Py_VerboseFlag)
742 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
743 count, archive);
744 return files;
745error:
746 fclose(fp);
747 Py_XDECREF(files);
748 return NULL;
749}
750
751/* Return the zlib.decompress function object, or NULL if zlib couldn't
752 be imported. The function is cached when found, so subsequent calls
753 don't import zlib again. Returns a *borrowed* reference.
754 XXX This makes zlib.decompress immortal. */
755static PyObject *
756get_decompress_func(void)
757{
758 static PyObject *decompress = NULL;
759
760 if (decompress == NULL) {
761 PyObject *zlib;
762 static int importing_zlib = 0;
763
764 if (importing_zlib != 0)
765 /* Someone has a zlib.py[co] in their Zip file;
766 let's avoid a stack overflow. */
767 return NULL;
768 importing_zlib = 1;
769 zlib = PyImport_ImportModule("zlib"); /* import zlib */
770 importing_zlib = 0;
771 if (zlib != NULL) {
772 decompress = PyObject_GetAttrString(zlib,
773 "decompress");
774 Py_DECREF(zlib);
775 }
776 else
777 PyErr_Clear();
778 if (Py_VerboseFlag)
779 PySys_WriteStderr("# zipimport: zlib %s\n",
780 zlib != NULL ? "available": "UNAVAILABLE");
781 }
782 return decompress;
783}
784
785/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
786 data as a new reference. */
787static PyObject *
788get_data(char *archive, PyObject *toc_entry)
789{
790 PyObject *raw_data, *data = NULL, *decompress;
791 char *buf;
792 FILE *fp;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000793 int err;
794 Py_ssize_t bytes_read = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000795 long l;
796 char *datapath;
797 long compress, data_size, file_size, file_offset;
798 long time, date, crc;
799
Neal Norwitz0c0aad92003-02-18 03:37:49 +0000800 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
Just van Rossum52e14d62002-12-30 22:08:05 +0000801 &data_size, &file_size, &file_offset, &time,
802 &date, &crc)) {
803 return NULL;
804 }
805
806 fp = fopen(archive, "rb");
807 if (!fp) {
808 PyErr_Format(PyExc_IOError,
809 "zipimport: can not open file %s", archive);
810 return NULL;
811 }
812
813 /* Check to make sure the local file header is correct */
814 fseek(fp, file_offset, 0);
815 l = PyMarshal_ReadLongFromFile(fp);
816 if (l != 0x04034B50) {
817 /* Bad: Local File Header */
818 PyErr_Format(ZipImportError,
819 "bad local file header in %s",
820 archive);
821 fclose(fp);
822 return NULL;
823 }
824 fseek(fp, file_offset + 26, 0);
825 l = 30 + PyMarshal_ReadShortFromFile(fp) +
826 PyMarshal_ReadShortFromFile(fp); /* local header size */
827 file_offset += l; /* Start of file data */
828
829 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
830 data_size : data_size + 1);
831 if (raw_data == NULL) {
832 fclose(fp);
833 return NULL;
834 }
835 buf = PyString_AsString(raw_data);
836
837 err = fseek(fp, file_offset, 0);
838 if (err == 0)
839 bytes_read = fread(buf, 1, data_size, fp);
840 fclose(fp);
841 if (err || bytes_read != data_size) {
842 PyErr_SetString(PyExc_IOError,
843 "zipimport: can't read data");
844 Py_DECREF(raw_data);
845 return NULL;
846 }
847
848 if (compress != 0) {
849 buf[data_size] = 'Z'; /* saw this in zipfile.py */
850 data_size++;
851 }
852 buf[data_size] = '\0';
853
Guido van Rossumad8d3002007-08-03 18:40:49 +0000854 if (compress == 0) { /* data is not compressed */
855 raw_data = PyBytes_FromStringAndSize(buf, data_size);
Just van Rossum52e14d62002-12-30 22:08:05 +0000856 return raw_data;
Guido van Rossumad8d3002007-08-03 18:40:49 +0000857 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000858
859 /* Decompress with zlib */
860 decompress = get_decompress_func();
861 if (decompress == NULL) {
862 PyErr_SetString(ZipImportError,
863 "can't decompress data; "
864 "zlib not available");
865 goto error;
866 }
Just van Rossumee8f10f2003-09-07 13:36:48 +0000867 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000868error:
869 Py_DECREF(raw_data);
870 return data;
871}
872
873/* Lenient date/time comparison function. The precision of the mtime
874 in the archive is lower than the mtime stored in a .pyc: we
875 must allow a difference of at most one second. */
876static int
877eq_mtime(time_t t1, time_t t2)
878{
879 time_t d = t1 - t2;
880 if (d < 0)
881 d = -d;
882 /* dostime only stores even seconds, so be lenient */
883 return d <= 1;
884}
885
886/* Given the contents of a .py[co] file in a buffer, unmarshal the data
887 and return the code object. Return None if it the magic word doesn't
888 match (we do this instead of raising an exception as we fall back
889 to .py if available and we don't want to mask other errors).
890 Returns a new reference. */
891static PyObject *
892unmarshal_code(char *pathname, PyObject *data, time_t mtime)
893{
894 PyObject *code;
Guido van Rossumad8d3002007-08-03 18:40:49 +0000895 char *buf = PyBytes_AsString(data);
896 Py_ssize_t size = PyBytes_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +0000897
898 if (size <= 9) {
899 PyErr_SetString(ZipImportError,
900 "bad pyc data");
901 return NULL;
902 }
903
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000904 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000905 if (Py_VerboseFlag)
906 PySys_WriteStderr("# %s has bad magic\n",
907 pathname);
908 Py_INCREF(Py_None);
909 return Py_None; /* signal caller to try alternative */
910 }
911
Just van Rossum9a3129c2003-01-03 11:18:56 +0000912 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
913 mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000914 if (Py_VerboseFlag)
915 PySys_WriteStderr("# %s has bad mtime\n",
916 pathname);
917 Py_INCREF(Py_None);
918 return Py_None; /* signal caller to try alternative */
919 }
920
921 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
922 if (code == NULL)
923 return NULL;
924 if (!PyCode_Check(code)) {
925 Py_DECREF(code);
926 PyErr_Format(PyExc_TypeError,
927 "compiled module %.200s is not a code object",
928 pathname);
929 return NULL;
930 }
931 return code;
932}
933
934/* Replace any occurances of "\r\n?" in the input string with "\n".
935 This converts DOS and Mac line endings to Unix line endings.
936 Also append a trailing "\n" to be compatible with
937 PyParser_SimpleParseFile(). Returns a new reference. */
938static PyObject *
939normalize_line_endings(PyObject *source)
940{
Guido van Rossumad8d3002007-08-03 18:40:49 +0000941 char *buf, *q, *p = PyBytes_AsString(source);
Just van Rossum52e14d62002-12-30 22:08:05 +0000942 PyObject *fixed_source;
Guido van Rossumad8d3002007-08-03 18:40:49 +0000943 int len = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000944
Guido van Rossumad8d3002007-08-03 18:40:49 +0000945 if (!p) {
946 return PyBytes_FromStringAndSize("\n\0", 2);
947 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000948
Just van Rossum9a3129c2003-01-03 11:18:56 +0000949 /* one char extra for trailing \n and one for terminating \0 */
Guido van Rossumad8d3002007-08-03 18:40:49 +0000950 buf = (char *)PyMem_Malloc(PyBytes_Size(source) + 2);
Just van Rossum9a3129c2003-01-03 11:18:56 +0000951 if (buf == NULL) {
952 PyErr_SetString(PyExc_MemoryError,
953 "zipimport: no memory to allocate "
954 "source buffer");
Just van Rossum52e14d62002-12-30 22:08:05 +0000955 return NULL;
Just van Rossum9a3129c2003-01-03 11:18:56 +0000956 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000957 /* replace "\r\n?" by "\n" */
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000958 for (q = buf; *p != '\0'; p++) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000959 if (*p == '\r') {
960 *q++ = '\n';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000961 if (*(p + 1) == '\n')
Just van Rossum52e14d62002-12-30 22:08:05 +0000962 p++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000963 }
964 else
965 *q++ = *p;
Guido van Rossumad8d3002007-08-03 18:40:49 +0000966 len++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000967 }
968 *q++ = '\n'; /* add trailing \n */
969 *q = '\0';
Guido van Rossumad8d3002007-08-03 18:40:49 +0000970 fixed_source = PyBytes_FromStringAndSize(buf, len + 2);
Just van Rossum9a3129c2003-01-03 11:18:56 +0000971 PyMem_Free(buf);
Just van Rossum52e14d62002-12-30 22:08:05 +0000972 return fixed_source;
973}
974
975/* Given a string buffer containing Python source code, compile it
976 return and return a code object as a new reference. */
977static PyObject *
978compile_source(char *pathname, PyObject *source)
979{
980 PyObject *code, *fixed_source;
981
982 fixed_source = normalize_line_endings(source);
983 if (fixed_source == NULL)
984 return NULL;
985
Guido van Rossumad8d3002007-08-03 18:40:49 +0000986 code = Py_CompileString(PyBytes_AsString(fixed_source), pathname,
Just van Rossum52e14d62002-12-30 22:08:05 +0000987 Py_file_input);
988 Py_DECREF(fixed_source);
989 return code;
990}
991
992/* Convert the date/time values found in the Zip archive to a value
993 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +0000994static time_t
995parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +0000996{
997 struct tm stm;
998
999 stm.tm_sec = (dostime & 0x1f) * 2;
1000 stm.tm_min = (dostime >> 5) & 0x3f;
1001 stm.tm_hour = (dostime >> 11) & 0x1f;
1002 stm.tm_mday = dosdate & 0x1f;
1003 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1004 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
Just van Rossum547eb422003-04-08 20:07:15 +00001005 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001006
1007 return mktime(&stm);
1008}
1009
1010/* Given a path to a .pyc or .pyo file in the archive, return the
1011 modifictaion time of the matching .py file, or 0 if no source
1012 is available. */
1013static time_t
1014get_mtime_of_source(ZipImporter *self, char *path)
1015{
1016 PyObject *toc_entry;
1017 time_t mtime = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001018 Py_ssize_t lastchar = strlen(path) - 1;
Just van Rossum52e14d62002-12-30 22:08:05 +00001019 char savechar = path[lastchar];
1020 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1021 toc_entry = PyDict_GetItemString(self->files, path);
1022 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1023 PyTuple_Size(toc_entry) == 8) {
Just van Rossumf8b6de12002-12-31 09:51:59 +00001024 /* fetch the time stamp of the .py file for comparison
1025 with an embedded pyc time stamp */
1026 int time, date;
Just van Rossum52e14d62002-12-30 22:08:05 +00001027 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1028 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1029 mtime = parse_dostime(time, date);
1030 }
1031 path[lastchar] = savechar;
1032 return mtime;
1033}
1034
1035/* Return the code object for the module named by 'fullname' from the
1036 Zip archive as a new reference. */
1037static PyObject *
1038get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1039 time_t mtime, PyObject *toc_entry)
1040{
1041 PyObject *data, *code;
1042 char *modpath;
1043 char *archive = PyString_AsString(self->archive);
1044
1045 if (archive == NULL)
1046 return NULL;
1047
1048 data = get_data(archive, toc_entry);
1049 if (data == NULL)
1050 return NULL;
1051
1052 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1053
1054 if (isbytecode) {
1055 code = unmarshal_code(modpath, data, mtime);
1056 }
1057 else {
1058 code = compile_source(modpath, data);
1059 }
1060 Py_DECREF(data);
1061 return code;
1062}
1063
1064/* Get the code object assoiciated with the module specified by
1065 'fullname'. */
1066static PyObject *
1067get_module_code(ZipImporter *self, char *fullname,
1068 int *p_ispackage, char **p_modpath)
1069{
1070 PyObject *toc_entry;
1071 char *subname, path[MAXPATHLEN + 1];
1072 int len;
1073 struct st_zip_searchorder *zso;
1074
1075 subname = get_subname(fullname);
1076
1077 len = make_filename(PyString_AsString(self->prefix), subname, path);
1078 if (len < 0)
1079 return NULL;
1080
1081 for (zso = zip_searchorder; *zso->suffix; zso++) {
1082 PyObject *code = NULL;
1083
1084 strcpy(path + len, zso->suffix);
1085 if (Py_VerboseFlag > 1)
1086 PySys_WriteStderr("# trying %s%c%s\n",
1087 PyString_AsString(self->archive),
1088 SEP, path);
1089 toc_entry = PyDict_GetItemString(self->files, path);
1090 if (toc_entry != NULL) {
1091 time_t mtime = 0;
1092 int ispackage = zso->type & IS_PACKAGE;
1093 int isbytecode = zso->type & IS_BYTECODE;
1094
1095 if (isbytecode)
1096 mtime = get_mtime_of_source(self, path);
1097 if (p_ispackage != NULL)
1098 *p_ispackage = ispackage;
1099 code = get_code_from_data(self, ispackage,
1100 isbytecode, mtime,
1101 toc_entry);
1102 if (code == Py_None) {
1103 /* bad magic number or non-matching mtime
1104 in byte code, try next */
1105 Py_DECREF(code);
1106 continue;
1107 }
1108 if (code != NULL && p_modpath != NULL)
1109 *p_modpath = PyString_AsString(
1110 PyTuple_GetItem(toc_entry, 0));
1111 return code;
1112 }
1113 }
1114 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1115 return NULL;
1116}
1117
1118
1119/* Module init */
1120
1121PyDoc_STRVAR(zipimport_doc,
1122"zipimport provides support for importing Python modules from Zip archives.\n\
1123\n\
1124This module exports three objects:\n\
1125- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001126- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001127 subclass of ImportError, so it can be caught as ImportError, too.\n\
1128- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1129 info dicts, as used in zipimporter._files.\n\
1130\n\
1131It is usually not needed to use the zipimport module explicitly; it is\n\
1132used by the builtin import mechanism for sys.path items that are paths\n\
1133to Zip archives.");
1134
1135PyMODINIT_FUNC
1136initzipimport(void)
1137{
1138 PyObject *mod;
1139
1140 if (PyType_Ready(&ZipImporter_Type) < 0)
1141 return;
1142
1143 /* Correct directory separator */
1144 zip_searchorder[0].suffix[0] = SEP;
1145 zip_searchorder[1].suffix[0] = SEP;
1146 zip_searchorder[2].suffix[0] = SEP;
1147 if (Py_OptimizeFlag) {
1148 /* Reverse *.pyc and *.pyo */
1149 struct st_zip_searchorder tmp;
1150 tmp = zip_searchorder[0];
1151 zip_searchorder[0] = zip_searchorder[1];
1152 zip_searchorder[1] = tmp;
1153 tmp = zip_searchorder[3];
1154 zip_searchorder[3] = zip_searchorder[4];
1155 zip_searchorder[4] = tmp;
1156 }
1157
1158 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1159 NULL, PYTHON_API_VERSION);
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00001160 if (mod == NULL)
1161 return;
Just van Rossum52e14d62002-12-30 22:08:05 +00001162
1163 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1164 PyExc_ImportError, NULL);
1165 if (ZipImportError == NULL)
1166 return;
1167
1168 Py_INCREF(ZipImportError);
1169 if (PyModule_AddObject(mod, "ZipImportError",
1170 ZipImportError) < 0)
1171 return;
1172
1173 Py_INCREF(&ZipImporter_Type);
1174 if (PyModule_AddObject(mod, "zipimporter",
1175 (PyObject *)&ZipImporter_Type) < 0)
1176 return;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001177
Just van Rossum52e14d62002-12-30 22:08:05 +00001178 zip_directory_cache = PyDict_New();
1179 if (zip_directory_cache == NULL)
1180 return;
1181 Py_INCREF(zip_directory_cache);
1182 if (PyModule_AddObject(mod, "_zip_directory_cache",
1183 zip_directory_cache) < 0)
1184 return;
1185}