blob: bf913f3d7fbbf51d4b8f021faae53bef1bcb2a43 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
13 char suffix[14];
14 int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Just van Rossum52e14d62002-12-30 22:08:05 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
41};
42
Just van Rossum52e14d62002-12-30 22:08:05 +000043static PyObject *ZipImportError;
44static PyObject *zip_directory_cache = NULL;
45
46/* forward decls */
47static PyObject *read_directory(char *archive);
48static PyObject *get_data(char *archive, PyObject *toc_entry);
49static PyObject *get_module_code(ZipImporter *self, char *fullname,
50 int *p_ispackage, char **p_modpath);
51
52
53#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
54
55
56/* zipimporter.__init__
57 Split the "subdirectory" from the Zip archive path, lookup a matching
58 entry in sys.path_importer_cache, fetch the file directory from there
59 if found, or else read it from the archive. */
60static int
61zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
62{
63 char *path, *p, *prefix, buf[MAXPATHLEN+2];
Neal Norwitzd39d8612006-01-08 01:03:36 +000064 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +000065
Georg Brandl02c42872005-08-26 06:42:30 +000066 if (!_PyArg_NoKeywords("zipimporter()", kwds))
67 return -1;
68
Just van Rossum52e14d62002-12-30 22:08:05 +000069 if (!PyArg_ParseTuple(args, "s:zipimporter",
70 &path))
71 return -1;
72
73 len = strlen(path);
74 if (len == 0) {
75 PyErr_SetString(ZipImportError, "archive path is empty");
76 return -1;
77 }
78 if (len >= MAXPATHLEN) {
79 PyErr_SetString(ZipImportError,
80 "archive path too long");
81 return -1;
82 }
83 strcpy(buf, path);
84
85#ifdef ALTSEP
86 for (p = buf; *p; p++) {
87 if (*p == ALTSEP)
88 *p = SEP;
89 }
90#endif
91
92 path = NULL;
93 prefix = NULL;
94 for (;;) {
Martin v. Löwisa94568a2003-05-10 07:36:56 +000095#ifndef RISCOS
Just van Rossum52e14d62002-12-30 22:08:05 +000096 struct stat statbuf;
97 int rv;
98
99 rv = stat(buf, &statbuf);
100 if (rv == 0) {
101 /* it exists */
102 if (S_ISREG(statbuf.st_mode))
103 /* it's a file */
104 path = buf;
105 break;
106 }
Martin v. Löwisa94568a2003-05-10 07:36:56 +0000107#else
108 if (object_exists(buf)) {
109 /* it exists */
110 if (isfile(buf))
111 /* it's a file */
112 path = buf;
113 break;
114 }
115#endif
Just van Rossum52e14d62002-12-30 22:08:05 +0000116 /* back up one path element */
Just van Rossumd35c6db2003-01-02 12:55:48 +0000117 p = strrchr(buf, SEP);
Just van Rossum52e14d62002-12-30 22:08:05 +0000118 if (prefix != NULL)
119 *prefix = SEP;
120 if (p == NULL)
121 break;
122 *p = '\0';
123 prefix = p;
124 }
125 if (path != NULL) {
126 PyObject *files;
127 files = PyDict_GetItemString(zip_directory_cache, path);
128 if (files == NULL) {
129 files = read_directory(buf);
130 if (files == NULL)
131 return -1;
132 if (PyDict_SetItemString(zip_directory_cache, path,
133 files) != 0)
134 return -1;
135 }
136 else
137 Py_INCREF(files);
138 self->files = files;
139 }
140 else {
141 PyErr_SetString(ZipImportError, "not a Zip file");
142 return -1;
143 }
144
145 if (prefix == NULL)
146 prefix = "";
147 else {
148 prefix++;
149 len = strlen(prefix);
150 if (prefix[len-1] != SEP) {
151 /* add trailing SEP */
152 prefix[len] = SEP;
153 prefix[len + 1] = '\0';
154 }
155 }
156
157 self->archive = PyString_FromString(buf);
158 if (self->archive == NULL)
159 return -1;
160
161 self->prefix = PyString_FromString(prefix);
162 if (self->prefix == NULL)
163 return -1;
164
165 return 0;
166}
167
168/* GC support. */
169static int
170zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
171{
172 ZipImporter *self = (ZipImporter *)obj;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000173 Py_VISIT(self->files);
Just van Rossum52e14d62002-12-30 22:08:05 +0000174 return 0;
175}
176
177static void
178zipimporter_dealloc(ZipImporter *self)
179{
180 PyObject_GC_UnTrack(self);
181 Py_XDECREF(self->archive);
Just van Rossumf8b6de12002-12-31 09:51:59 +0000182 Py_XDECREF(self->prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000183 Py_XDECREF(self->files);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000184 Py_Type(self)->tp_free((PyObject *)self);
Just van Rossum52e14d62002-12-30 22:08:05 +0000185}
186
187static PyObject *
188zipimporter_repr(ZipImporter *self)
189{
Just van Rossum52e14d62002-12-30 22:08:05 +0000190 char *archive = "???";
191 char *prefix = "";
192
193 if (self->archive != NULL && PyString_Check(self->archive))
194 archive = PyString_AsString(self->archive);
195 if (self->prefix != NULL && PyString_Check(self->prefix))
196 prefix = PyString_AsString(self->prefix);
197 if (prefix != NULL && *prefix)
Walter Dörwald5b0443c2007-06-05 16:19:33 +0000198 return PyUnicode_FromFormat("<zipimporter object \"%.300s%c%.150s\">",
199 archive, SEP, prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000200 else
Walter Dörwald5b0443c2007-06-05 16:19:33 +0000201 return PyUnicode_FromFormat("<zipimporter object \"%.300s\">",
202 archive);
Just van Rossum52e14d62002-12-30 22:08:05 +0000203}
204
205/* return fullname.split(".")[-1] */
206static char *
207get_subname(char *fullname)
208{
209 char *subname = strrchr(fullname, '.');
210 if (subname == NULL)
211 subname = fullname;
212 else
213 subname++;
214 return subname;
215}
216
217/* Given a (sub)modulename, write the potential file path in the
218 archive (without extension) to the path buffer. Return the
219 length of the resulting string. */
220static int
221make_filename(char *prefix, char *name, char *path)
222{
Neal Norwitzd39d8612006-01-08 01:03:36 +0000223 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000224 char *p;
225
226 len = strlen(prefix);
227
228 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
229 if (len + strlen(name) + 13 >= MAXPATHLEN) {
230 PyErr_SetString(ZipImportError, "path too long");
231 return -1;
232 }
233
234 strcpy(path, prefix);
235 strcpy(path + len, name);
236 for (p = path + len; *p; p++) {
237 if (*p == '.')
238 *p = SEP;
239 }
240 len += strlen(name);
Martin v. Löwis18e16552006-02-15 17:27:45 +0000241 assert(len < INT_MAX);
Neal Norwitzd39d8612006-01-08 01:03:36 +0000242 return (int)len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000243}
244
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000245enum zi_module_info {
Just van Rossum52e14d62002-12-30 22:08:05 +0000246 MI_ERROR,
247 MI_NOT_FOUND,
248 MI_MODULE,
249 MI_PACKAGE
250};
251
252/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000253static enum zi_module_info
Just van Rossum52e14d62002-12-30 22:08:05 +0000254get_module_info(ZipImporter *self, char *fullname)
255{
256 char *subname, path[MAXPATHLEN + 1];
257 int len;
258 struct st_zip_searchorder *zso;
259
260 subname = get_subname(fullname);
261
262 len = make_filename(PyString_AsString(self->prefix), subname, path);
263 if (len < 0)
264 return MI_ERROR;
265
266 for (zso = zip_searchorder; *zso->suffix; zso++) {
267 strcpy(path + len, zso->suffix);
268 if (PyDict_GetItemString(self->files, path) != NULL) {
269 if (zso->type & IS_PACKAGE)
270 return MI_PACKAGE;
271 else
272 return MI_MODULE;
273 }
274 }
275 return MI_NOT_FOUND;
276}
277
278/* Check whether we can satisfy the import of the module named by
279 'fullname'. Return self if we can, None if we can't. */
280static PyObject *
281zipimporter_find_module(PyObject *obj, PyObject *args)
282{
283 ZipImporter *self = (ZipImporter *)obj;
284 PyObject *path = NULL;
285 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000286 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000287
288 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
289 &fullname, &path))
290 return NULL;
291
292 mi = get_module_info(self, fullname);
293 if (mi == MI_ERROR)
294 return NULL;
295 if (mi == MI_NOT_FOUND) {
296 Py_INCREF(Py_None);
297 return Py_None;
298 }
299 Py_INCREF(self);
300 return (PyObject *)self;
301}
302
303/* Load and return the module named by 'fullname'. */
304static PyObject *
305zipimporter_load_module(PyObject *obj, PyObject *args)
306{
307 ZipImporter *self = (ZipImporter *)obj;
308 PyObject *code, *mod, *dict;
309 char *fullname, *modpath;
310 int ispackage;
311
312 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
313 &fullname))
314 return NULL;
315
316 code = get_module_code(self, fullname, &ispackage, &modpath);
317 if (code == NULL)
318 return NULL;
319
320 mod = PyImport_AddModule(fullname);
321 if (mod == NULL) {
322 Py_DECREF(code);
323 return NULL;
324 }
325 dict = PyModule_GetDict(mod);
326
327 /* mod.__loader__ = self */
328 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
329 goto error;
330
331 if (ispackage) {
332 /* add __path__ to the module *before* the code gets
333 executed */
334 PyObject *pkgpath, *fullpath;
335 char *prefix = PyString_AsString(self->prefix);
336 char *subname = get_subname(fullname);
337 int err;
338
339 fullpath = PyString_FromFormat("%s%c%s%s",
340 PyString_AsString(self->archive),
341 SEP,
342 *prefix ? prefix : "",
343 subname);
344 if (fullpath == NULL)
345 goto error;
346
347 pkgpath = Py_BuildValue("[O]", fullpath);
348 Py_DECREF(fullpath);
349 if (pkgpath == NULL)
350 goto error;
351 err = PyDict_SetItemString(dict, "__path__", pkgpath);
352 Py_DECREF(pkgpath);
353 if (err != 0)
354 goto error;
355 }
356 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
357 Py_DECREF(code);
358 if (Py_VerboseFlag)
359 PySys_WriteStderr("import %s # loaded from Zip %s\n",
360 fullname, modpath);
361 return mod;
362error:
363 Py_DECREF(code);
364 Py_DECREF(mod);
365 return NULL;
366}
367
368/* Return a bool signifying whether the module is a package or not. */
369static PyObject *
370zipimporter_is_package(PyObject *obj, PyObject *args)
371{
372 ZipImporter *self = (ZipImporter *)obj;
373 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000374 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000375
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000376 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
Just van Rossum52e14d62002-12-30 22:08:05 +0000377 &fullname))
378 return NULL;
379
380 mi = get_module_info(self, fullname);
381 if (mi == MI_ERROR)
382 return NULL;
383 if (mi == MI_NOT_FOUND) {
384 PyErr_Format(ZipImportError, "can't find module '%.200s'",
385 fullname);
386 return NULL;
387 }
388 return PyBool_FromLong(mi == MI_PACKAGE);
389}
390
391static PyObject *
392zipimporter_get_data(PyObject *obj, PyObject *args)
393{
394 ZipImporter *self = (ZipImporter *)obj;
395 char *path;
396#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000397 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000398#endif
399 PyObject *toc_entry;
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000400 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000401
402 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
403 return NULL;
404
405#ifdef ALTSEP
406 if (strlen(path) >= MAXPATHLEN) {
407 PyErr_SetString(ZipImportError, "path too long");
408 return NULL;
409 }
410 strcpy(buf, path);
411 for (p = buf; *p; p++) {
412 if (*p == ALTSEP)
413 *p = SEP;
414 }
415 path = buf;
416#endif
417 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000418 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000419 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
420 path[len] == SEP) {
421 path = path + len + 1;
422 }
423
424 toc_entry = PyDict_GetItemString(self->files, path);
425 if (toc_entry == NULL) {
Georg Brandle9b19492006-02-19 09:38:58 +0000426 PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
Just van Rossum52e14d62002-12-30 22:08:05 +0000427 return NULL;
428 }
429 return get_data(PyString_AsString(self->archive), toc_entry);
430}
431
432static PyObject *
433zipimporter_get_code(PyObject *obj, PyObject *args)
434{
435 ZipImporter *self = (ZipImporter *)obj;
436 char *fullname;
437
438 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
439 return NULL;
440
441 return get_module_code(self, fullname, NULL, NULL);
442}
443
444static PyObject *
445zipimporter_get_source(PyObject *obj, PyObject *args)
446{
447 ZipImporter *self = (ZipImporter *)obj;
448 PyObject *toc_entry;
449 char *fullname, *subname, path[MAXPATHLEN+1];
450 int len;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000451 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000452
453 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
454 return NULL;
455
456 mi = get_module_info(self, fullname);
457 if (mi == MI_ERROR)
458 return NULL;
459 if (mi == MI_NOT_FOUND) {
460 PyErr_Format(ZipImportError, "can't find module '%.200s'",
461 fullname);
462 return NULL;
463 }
464 subname = get_subname(fullname);
465
466 len = make_filename(PyString_AsString(self->prefix), subname, path);
467 if (len < 0)
468 return NULL;
469
470 if (mi == MI_PACKAGE) {
471 path[len] = SEP;
472 strcpy(path + len + 1, "__init__.py");
473 }
474 else
475 strcpy(path + len, ".py");
476
477 toc_entry = PyDict_GetItemString(self->files, path);
478 if (toc_entry != NULL)
479 return get_data(PyString_AsString(self->archive), toc_entry);
480
481 /* we have the module, but no source */
482 Py_INCREF(Py_None);
483 return Py_None;
484}
485
486PyDoc_STRVAR(doc_find_module,
487"find_module(fullname, path=None) -> self or None.\n\
488\n\
489Search for a module specified by 'fullname'. 'fullname' must be the\n\
490fully qualified (dotted) module name. It returns the zipimporter\n\
491instance itself if the module was found, or None if it wasn't.\n\
492The optional 'path' argument is ignored -- it's there for compatibility\n\
493with the importer protocol.");
494
495PyDoc_STRVAR(doc_load_module,
496"load_module(fullname) -> module.\n\
497\n\
498Load the module specified by 'fullname'. 'fullname' must be the\n\
499fully qualified (dotted) module name. It returns the imported\n\
500module, or raises ZipImportError if it wasn't found.");
501
502PyDoc_STRVAR(doc_get_data,
503"get_data(pathname) -> string with file data.\n\
504\n\
505Return the data associated with 'pathname'. Raise IOError if\n\
506the file wasn't found.");
507
508PyDoc_STRVAR(doc_is_package,
509"is_package(fullname) -> bool.\n\
510\n\
511Return True if the module specified by fullname is a package.\n\
512Raise ZipImportError is the module couldn't be found.");
513
514PyDoc_STRVAR(doc_get_code,
515"get_code(fullname) -> code object.\n\
516\n\
517Return the code object for the specified module. Raise ZipImportError\n\
518is the module couldn't be found.");
519
520PyDoc_STRVAR(doc_get_source,
521"get_source(fullname) -> source string.\n\
522\n\
523Return the source code for the specified module. Raise ZipImportError\n\
524is the module couldn't be found, return None if the archive does\n\
525contain the module, but has no source for it.");
526
527static PyMethodDef zipimporter_methods[] = {
528 {"find_module", zipimporter_find_module, METH_VARARGS,
529 doc_find_module},
530 {"load_module", zipimporter_load_module, METH_VARARGS,
531 doc_load_module},
532 {"get_data", zipimporter_get_data, METH_VARARGS,
533 doc_get_data},
534 {"get_code", zipimporter_get_code, METH_VARARGS,
535 doc_get_code},
536 {"get_source", zipimporter_get_source, METH_VARARGS,
537 doc_get_source},
538 {"is_package", zipimporter_is_package, METH_VARARGS,
539 doc_is_package},
540 {NULL, NULL} /* sentinel */
541};
542
543static PyMemberDef zipimporter_members[] = {
544 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
545 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
546 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
547 {NULL}
548};
549
550PyDoc_STRVAR(zipimporter_doc,
551"zipimporter(archivepath) -> zipimporter object\n\
552\n\
553Create a new zipimporter instance. 'archivepath' must be a path to\n\
554a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
555a valid Zip archive.");
556
557#define DEFERRED_ADDRESS(ADDR) 0
558
559static PyTypeObject ZipImporter_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000560 PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
Just van Rossum52e14d62002-12-30 22:08:05 +0000561 "zipimport.zipimporter",
562 sizeof(ZipImporter),
563 0, /* tp_itemsize */
564 (destructor)zipimporter_dealloc, /* tp_dealloc */
565 0, /* tp_print */
566 0, /* tp_getattr */
567 0, /* tp_setattr */
568 0, /* tp_compare */
569 (reprfunc)zipimporter_repr, /* tp_repr */
570 0, /* tp_as_number */
571 0, /* tp_as_sequence */
572 0, /* tp_as_mapping */
573 0, /* tp_hash */
574 0, /* tp_call */
575 0, /* tp_str */
576 PyObject_GenericGetAttr, /* tp_getattro */
577 0, /* tp_setattro */
578 0, /* tp_as_buffer */
579 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
580 Py_TPFLAGS_HAVE_GC, /* tp_flags */
581 zipimporter_doc, /* tp_doc */
582 zipimporter_traverse, /* tp_traverse */
583 0, /* tp_clear */
584 0, /* tp_richcompare */
585 0, /* tp_weaklistoffset */
586 0, /* tp_iter */
587 0, /* tp_iternext */
588 zipimporter_methods, /* tp_methods */
589 zipimporter_members, /* tp_members */
590 0, /* tp_getset */
591 0, /* tp_base */
592 0, /* tp_dict */
593 0, /* tp_descr_get */
594 0, /* tp_descr_set */
595 0, /* tp_dictoffset */
596 (initproc)zipimporter_init, /* tp_init */
597 PyType_GenericAlloc, /* tp_alloc */
598 PyType_GenericNew, /* tp_new */
599 PyObject_GC_Del, /* tp_free */
600};
601
602
603/* implementation */
604
Just van Rossum52e14d62002-12-30 22:08:05 +0000605/* Given a buffer, return the long that is represented by the first
606 4 bytes, encoded as little endian. This partially reimplements
607 marshal.c:r_long() */
608static long
609get_long(unsigned char *buf) {
610 long x;
611 x = buf[0];
612 x |= (long)buf[1] << 8;
613 x |= (long)buf[2] << 16;
614 x |= (long)buf[3] << 24;
615#if SIZEOF_LONG > 4
616 /* Sign extension for 64-bit machines */
617 x |= -(x & 0x80000000L);
618#endif
619 return x;
620}
621
622/*
623 read_directory(archive) -> files dict (new reference)
624
625 Given a path to a Zip archive, build a dict, mapping file names
626 (local to the archive, using SEP as a separator) to toc entries.
627
628 A toc_entry is a tuple:
629
Fred Drakef5b7fd22005-11-11 19:34:56 +0000630 (__file__, # value to use for __file__, available for all files
631 compress, # compression kind; 0 for uncompressed
Just van Rossum52e14d62002-12-30 22:08:05 +0000632 data_size, # size of compressed data on disk
633 file_size, # size of decompressed data
634 file_offset, # offset of file header from start of archive
635 time, # mod time of file (in dos format)
636 date, # mod data of file (in dos format)
637 crc, # crc checksum of the data
638 )
639
640 Directories can be recognized by the trailing SEP in the name,
641 data_size and file_offset are 0.
642*/
643static PyObject *
644read_directory(char *archive)
645{
646 PyObject *files = NULL;
647 FILE *fp;
648 long compress, crc, data_size, file_size, file_offset, date, time;
Thomas Heller354e3d92003-07-22 18:10:15 +0000649 long header_offset, name_size, header_size, header_position;
Neal Norwitzd39d8612006-01-08 01:03:36 +0000650 long i, l, count;
651 size_t length;
Just van Rossum52e14d62002-12-30 22:08:05 +0000652 char path[MAXPATHLEN + 5];
653 char name[MAXPATHLEN + 5];
654 char *p, endof_central_dir[22];
Thomas Heller354e3d92003-07-22 18:10:15 +0000655 long arc_offset; /* offset from beginning of file to start of zip-archive */
Just van Rossum52e14d62002-12-30 22:08:05 +0000656
657 if (strlen(archive) > MAXPATHLEN) {
658 PyErr_SetString(PyExc_OverflowError,
659 "Zip path name is too long");
660 return NULL;
661 }
662 strcpy(path, archive);
663
664 fp = fopen(archive, "rb");
665 if (fp == NULL) {
666 PyErr_Format(ZipImportError, "can't open Zip file: "
667 "'%.200s'", archive);
668 return NULL;
669 }
Just van Rossumf4ecc752003-02-28 08:54:01 +0000670 fseek(fp, -22, SEEK_END);
Thomas Heller354e3d92003-07-22 18:10:15 +0000671 header_position = ftell(fp);
Just van Rossum52e14d62002-12-30 22:08:05 +0000672 if (fread(endof_central_dir, 1, 22, fp) != 22) {
673 fclose(fp);
674 PyErr_Format(ZipImportError, "can't read Zip file: "
675 "'%.200s'", archive);
676 return NULL;
677 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000678 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000679 /* Bad: End of Central Dir signature */
680 fclose(fp);
681 PyErr_Format(ZipImportError, "not a Zip file: "
682 "'%.200s'", archive);
683 return NULL;
684 }
685
Thomas Heller354e3d92003-07-22 18:10:15 +0000686 header_size = get_long((unsigned char *)endof_central_dir + 12);
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000687 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Thomas Heller354e3d92003-07-22 18:10:15 +0000688 arc_offset = header_position - header_offset - header_size;
689 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000690
691 files = PyDict_New();
692 if (files == NULL)
693 goto error;
694
695 length = (long)strlen(path);
696 path[length] = SEP;
697
698 /* Start of Central Directory */
699 count = 0;
700 for (;;) {
701 PyObject *t;
702 int err;
703
704 fseek(fp, header_offset, 0); /* Start of file header */
705 l = PyMarshal_ReadLongFromFile(fp);
706 if (l != 0x02014B50)
707 break; /* Bad: Central Dir File Header */
708 fseek(fp, header_offset + 10, 0);
709 compress = PyMarshal_ReadShortFromFile(fp);
710 time = PyMarshal_ReadShortFromFile(fp);
711 date = PyMarshal_ReadShortFromFile(fp);
712 crc = PyMarshal_ReadLongFromFile(fp);
713 data_size = PyMarshal_ReadLongFromFile(fp);
714 file_size = PyMarshal_ReadLongFromFile(fp);
715 name_size = PyMarshal_ReadShortFromFile(fp);
716 header_size = 46 + name_size +
717 PyMarshal_ReadShortFromFile(fp) +
718 PyMarshal_ReadShortFromFile(fp);
719 fseek(fp, header_offset + 42, 0);
Thomas Heller354e3d92003-07-22 18:10:15 +0000720 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000721 if (name_size > MAXPATHLEN)
722 name_size = MAXPATHLEN;
723
724 p = name;
725 for (i = 0; i < name_size; i++) {
726 *p = (char)getc(fp);
727 if (*p == '/')
728 *p = SEP;
729 p++;
730 }
731 *p = 0; /* Add terminating null byte */
732 header_offset += header_size;
733
734 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
735
736 t = Py_BuildValue("siiiiiii", path, compress, data_size,
737 file_size, file_offset, time, date, crc);
738 if (t == NULL)
739 goto error;
740 err = PyDict_SetItemString(files, name, t);
741 Py_DECREF(t);
742 if (err != 0)
743 goto error;
744 count++;
745 }
746 fclose(fp);
747 if (Py_VerboseFlag)
748 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
749 count, archive);
750 return files;
751error:
752 fclose(fp);
753 Py_XDECREF(files);
754 return NULL;
755}
756
757/* Return the zlib.decompress function object, or NULL if zlib couldn't
758 be imported. The function is cached when found, so subsequent calls
759 don't import zlib again. Returns a *borrowed* reference.
760 XXX This makes zlib.decompress immortal. */
761static PyObject *
762get_decompress_func(void)
763{
764 static PyObject *decompress = NULL;
765
766 if (decompress == NULL) {
767 PyObject *zlib;
768 static int importing_zlib = 0;
769
770 if (importing_zlib != 0)
771 /* Someone has a zlib.py[co] in their Zip file;
772 let's avoid a stack overflow. */
773 return NULL;
774 importing_zlib = 1;
775 zlib = PyImport_ImportModule("zlib"); /* import zlib */
776 importing_zlib = 0;
777 if (zlib != NULL) {
778 decompress = PyObject_GetAttrString(zlib,
779 "decompress");
780 Py_DECREF(zlib);
781 }
782 else
783 PyErr_Clear();
784 if (Py_VerboseFlag)
785 PySys_WriteStderr("# zipimport: zlib %s\n",
786 zlib != NULL ? "available": "UNAVAILABLE");
787 }
788 return decompress;
789}
790
791/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
792 data as a new reference. */
793static PyObject *
794get_data(char *archive, PyObject *toc_entry)
795{
796 PyObject *raw_data, *data = NULL, *decompress;
797 char *buf;
798 FILE *fp;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000799 int err;
800 Py_ssize_t bytes_read = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000801 long l;
802 char *datapath;
803 long compress, data_size, file_size, file_offset;
804 long time, date, crc;
805
Neal Norwitz0c0aad92003-02-18 03:37:49 +0000806 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
Just van Rossum52e14d62002-12-30 22:08:05 +0000807 &data_size, &file_size, &file_offset, &time,
808 &date, &crc)) {
809 return NULL;
810 }
811
812 fp = fopen(archive, "rb");
813 if (!fp) {
814 PyErr_Format(PyExc_IOError,
815 "zipimport: can not open file %s", archive);
816 return NULL;
817 }
818
819 /* Check to make sure the local file header is correct */
820 fseek(fp, file_offset, 0);
821 l = PyMarshal_ReadLongFromFile(fp);
822 if (l != 0x04034B50) {
823 /* Bad: Local File Header */
824 PyErr_Format(ZipImportError,
825 "bad local file header in %s",
826 archive);
827 fclose(fp);
828 return NULL;
829 }
830 fseek(fp, file_offset + 26, 0);
831 l = 30 + PyMarshal_ReadShortFromFile(fp) +
832 PyMarshal_ReadShortFromFile(fp); /* local header size */
833 file_offset += l; /* Start of file data */
834
835 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
836 data_size : data_size + 1);
837 if (raw_data == NULL) {
838 fclose(fp);
839 return NULL;
840 }
841 buf = PyString_AsString(raw_data);
842
843 err = fseek(fp, file_offset, 0);
844 if (err == 0)
845 bytes_read = fread(buf, 1, data_size, fp);
846 fclose(fp);
847 if (err || bytes_read != data_size) {
848 PyErr_SetString(PyExc_IOError,
849 "zipimport: can't read data");
850 Py_DECREF(raw_data);
851 return NULL;
852 }
853
854 if (compress != 0) {
855 buf[data_size] = 'Z'; /* saw this in zipfile.py */
856 data_size++;
857 }
858 buf[data_size] = '\0';
859
860 if (compress == 0) /* data is not compressed */
861 return raw_data;
862
863 /* Decompress with zlib */
864 decompress = get_decompress_func();
865 if (decompress == NULL) {
866 PyErr_SetString(ZipImportError,
867 "can't decompress data; "
868 "zlib not available");
869 goto error;
870 }
Just van Rossumee8f10f2003-09-07 13:36:48 +0000871 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000872error:
873 Py_DECREF(raw_data);
874 return data;
875}
876
877/* Lenient date/time comparison function. The precision of the mtime
878 in the archive is lower than the mtime stored in a .pyc: we
879 must allow a difference of at most one second. */
880static int
881eq_mtime(time_t t1, time_t t2)
882{
883 time_t d = t1 - t2;
884 if (d < 0)
885 d = -d;
886 /* dostime only stores even seconds, so be lenient */
887 return d <= 1;
888}
889
890/* Given the contents of a .py[co] file in a buffer, unmarshal the data
891 and return the code object. Return None if it the magic word doesn't
892 match (we do this instead of raising an exception as we fall back
893 to .py if available and we don't want to mask other errors).
894 Returns a new reference. */
895static PyObject *
896unmarshal_code(char *pathname, PyObject *data, time_t mtime)
897{
898 PyObject *code;
899 char *buf = PyString_AsString(data);
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000900 Py_ssize_t size = PyString_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +0000901
902 if (size <= 9) {
903 PyErr_SetString(ZipImportError,
904 "bad pyc data");
905 return NULL;
906 }
907
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000908 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000909 if (Py_VerboseFlag)
910 PySys_WriteStderr("# %s has bad magic\n",
911 pathname);
912 Py_INCREF(Py_None);
913 return Py_None; /* signal caller to try alternative */
914 }
915
Just van Rossum9a3129c2003-01-03 11:18:56 +0000916 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
917 mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000918 if (Py_VerboseFlag)
919 PySys_WriteStderr("# %s has bad mtime\n",
920 pathname);
921 Py_INCREF(Py_None);
922 return Py_None; /* signal caller to try alternative */
923 }
924
925 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
926 if (code == NULL)
927 return NULL;
928 if (!PyCode_Check(code)) {
929 Py_DECREF(code);
930 PyErr_Format(PyExc_TypeError,
931 "compiled module %.200s is not a code object",
932 pathname);
933 return NULL;
934 }
935 return code;
936}
937
938/* Replace any occurances of "\r\n?" in the input string with "\n".
939 This converts DOS and Mac line endings to Unix line endings.
940 Also append a trailing "\n" to be compatible with
941 PyParser_SimpleParseFile(). Returns a new reference. */
942static PyObject *
943normalize_line_endings(PyObject *source)
944{
Just van Rossum9a3129c2003-01-03 11:18:56 +0000945 char *buf, *q, *p = PyString_AsString(source);
Just van Rossum52e14d62002-12-30 22:08:05 +0000946 PyObject *fixed_source;
947
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000948 if (!p)
949 return NULL;
950
Just van Rossum9a3129c2003-01-03 11:18:56 +0000951 /* one char extra for trailing \n and one for terminating \0 */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000952 buf = (char *)PyMem_Malloc(PyString_Size(source) + 2);
Just van Rossum9a3129c2003-01-03 11:18:56 +0000953 if (buf == NULL) {
954 PyErr_SetString(PyExc_MemoryError,
955 "zipimport: no memory to allocate "
956 "source buffer");
Just van Rossum52e14d62002-12-30 22:08:05 +0000957 return NULL;
Just van Rossum9a3129c2003-01-03 11:18:56 +0000958 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000959 /* replace "\r\n?" by "\n" */
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000960 for (q = buf; *p != '\0'; p++) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000961 if (*p == '\r') {
962 *q++ = '\n';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000963 if (*(p + 1) == '\n')
Just van Rossum52e14d62002-12-30 22:08:05 +0000964 p++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000965 }
966 else
967 *q++ = *p;
Just van Rossum52e14d62002-12-30 22:08:05 +0000968 }
969 *q++ = '\n'; /* add trailing \n */
970 *q = '\0';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000971 fixed_source = PyString_FromString(buf);
972 PyMem_Free(buf);
Just van Rossum52e14d62002-12-30 22:08:05 +0000973 return fixed_source;
974}
975
976/* Given a string buffer containing Python source code, compile it
977 return and return a code object as a new reference. */
978static PyObject *
979compile_source(char *pathname, PyObject *source)
980{
981 PyObject *code, *fixed_source;
982
983 fixed_source = normalize_line_endings(source);
984 if (fixed_source == NULL)
985 return NULL;
986
987 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
988 Py_file_input);
989 Py_DECREF(fixed_source);
990 return code;
991}
992
993/* Convert the date/time values found in the Zip archive to a value
994 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +0000995static time_t
996parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +0000997{
998 struct tm stm;
999
1000 stm.tm_sec = (dostime & 0x1f) * 2;
1001 stm.tm_min = (dostime >> 5) & 0x3f;
1002 stm.tm_hour = (dostime >> 11) & 0x1f;
1003 stm.tm_mday = dosdate & 0x1f;
1004 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1005 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
Just van Rossum547eb422003-04-08 20:07:15 +00001006 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001007
1008 return mktime(&stm);
1009}
1010
1011/* Given a path to a .pyc or .pyo file in the archive, return the
1012 modifictaion time of the matching .py file, or 0 if no source
1013 is available. */
1014static time_t
1015get_mtime_of_source(ZipImporter *self, char *path)
1016{
1017 PyObject *toc_entry;
1018 time_t mtime = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001019 Py_ssize_t lastchar = strlen(path) - 1;
Just van Rossum52e14d62002-12-30 22:08:05 +00001020 char savechar = path[lastchar];
1021 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1022 toc_entry = PyDict_GetItemString(self->files, path);
1023 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1024 PyTuple_Size(toc_entry) == 8) {
Just van Rossumf8b6de12002-12-31 09:51:59 +00001025 /* fetch the time stamp of the .py file for comparison
1026 with an embedded pyc time stamp */
1027 int time, date;
Just van Rossum52e14d62002-12-30 22:08:05 +00001028 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1029 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1030 mtime = parse_dostime(time, date);
1031 }
1032 path[lastchar] = savechar;
1033 return mtime;
1034}
1035
1036/* Return the code object for the module named by 'fullname' from the
1037 Zip archive as a new reference. */
1038static PyObject *
1039get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1040 time_t mtime, PyObject *toc_entry)
1041{
1042 PyObject *data, *code;
1043 char *modpath;
1044 char *archive = PyString_AsString(self->archive);
1045
1046 if (archive == NULL)
1047 return NULL;
1048
1049 data = get_data(archive, toc_entry);
1050 if (data == NULL)
1051 return NULL;
1052
1053 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1054
1055 if (isbytecode) {
1056 code = unmarshal_code(modpath, data, mtime);
1057 }
1058 else {
1059 code = compile_source(modpath, data);
1060 }
1061 Py_DECREF(data);
1062 return code;
1063}
1064
1065/* Get the code object assoiciated with the module specified by
1066 'fullname'. */
1067static PyObject *
1068get_module_code(ZipImporter *self, char *fullname,
1069 int *p_ispackage, char **p_modpath)
1070{
1071 PyObject *toc_entry;
1072 char *subname, path[MAXPATHLEN + 1];
1073 int len;
1074 struct st_zip_searchorder *zso;
1075
1076 subname = get_subname(fullname);
1077
1078 len = make_filename(PyString_AsString(self->prefix), subname, path);
1079 if (len < 0)
1080 return NULL;
1081
1082 for (zso = zip_searchorder; *zso->suffix; zso++) {
1083 PyObject *code = NULL;
1084
1085 strcpy(path + len, zso->suffix);
1086 if (Py_VerboseFlag > 1)
1087 PySys_WriteStderr("# trying %s%c%s\n",
1088 PyString_AsString(self->archive),
1089 SEP, path);
1090 toc_entry = PyDict_GetItemString(self->files, path);
1091 if (toc_entry != NULL) {
1092 time_t mtime = 0;
1093 int ispackage = zso->type & IS_PACKAGE;
1094 int isbytecode = zso->type & IS_BYTECODE;
1095
1096 if (isbytecode)
1097 mtime = get_mtime_of_source(self, path);
1098 if (p_ispackage != NULL)
1099 *p_ispackage = ispackage;
1100 code = get_code_from_data(self, ispackage,
1101 isbytecode, mtime,
1102 toc_entry);
1103 if (code == Py_None) {
1104 /* bad magic number or non-matching mtime
1105 in byte code, try next */
1106 Py_DECREF(code);
1107 continue;
1108 }
1109 if (code != NULL && p_modpath != NULL)
1110 *p_modpath = PyString_AsString(
1111 PyTuple_GetItem(toc_entry, 0));
1112 return code;
1113 }
1114 }
1115 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1116 return NULL;
1117}
1118
1119
1120/* Module init */
1121
1122PyDoc_STRVAR(zipimport_doc,
1123"zipimport provides support for importing Python modules from Zip archives.\n\
1124\n\
1125This module exports three objects:\n\
1126- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001127- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001128 subclass of ImportError, so it can be caught as ImportError, too.\n\
1129- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1130 info dicts, as used in zipimporter._files.\n\
1131\n\
1132It is usually not needed to use the zipimport module explicitly; it is\n\
1133used by the builtin import mechanism for sys.path items that are paths\n\
1134to Zip archives.");
1135
1136PyMODINIT_FUNC
1137initzipimport(void)
1138{
1139 PyObject *mod;
1140
1141 if (PyType_Ready(&ZipImporter_Type) < 0)
1142 return;
1143
1144 /* Correct directory separator */
1145 zip_searchorder[0].suffix[0] = SEP;
1146 zip_searchorder[1].suffix[0] = SEP;
1147 zip_searchorder[2].suffix[0] = SEP;
1148 if (Py_OptimizeFlag) {
1149 /* Reverse *.pyc and *.pyo */
1150 struct st_zip_searchorder tmp;
1151 tmp = zip_searchorder[0];
1152 zip_searchorder[0] = zip_searchorder[1];
1153 zip_searchorder[1] = tmp;
1154 tmp = zip_searchorder[3];
1155 zip_searchorder[3] = zip_searchorder[4];
1156 zip_searchorder[4] = tmp;
1157 }
1158
1159 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1160 NULL, PYTHON_API_VERSION);
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00001161 if (mod == NULL)
1162 return;
Just van Rossum52e14d62002-12-30 22:08:05 +00001163
1164 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1165 PyExc_ImportError, NULL);
1166 if (ZipImportError == NULL)
1167 return;
1168
1169 Py_INCREF(ZipImportError);
1170 if (PyModule_AddObject(mod, "ZipImportError",
1171 ZipImportError) < 0)
1172 return;
1173
1174 Py_INCREF(&ZipImporter_Type);
1175 if (PyModule_AddObject(mod, "zipimporter",
1176 (PyObject *)&ZipImporter_Type) < 0)
1177 return;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001178
Just van Rossum52e14d62002-12-30 22:08:05 +00001179 zip_directory_cache = PyDict_New();
1180 if (zip_directory_cache == NULL)
1181 return;
1182 Py_INCREF(zip_directory_cache);
1183 if (PyModule_AddObject(mod, "_zip_directory_cache",
1184 zip_directory_cache) < 0)
1185 return;
1186}