blob: 69b28813c1d00786abe562d121df7f8dfc89e248 [file] [log] [blame]
Just van Rossum52e14d62002-12-30 22:08:05 +00001#include "Python.h"
2#include "structmember.h"
3#include "osdefs.h"
4#include "marshal.h"
Just van Rossum52e14d62002-12-30 22:08:05 +00005#include <time.h>
6
7
8#define IS_SOURCE 0x0
9#define IS_BYTECODE 0x1
10#define IS_PACKAGE 0x2
11
12struct st_zip_searchorder {
13 char suffix[14];
14 int type;
15};
16
17/* zip_searchorder defines how we search for a module in the Zip
18 archive: we first search for a package __init__, then for
19 non-package .pyc, .pyo and .py entries. The .pyc and .pyo entries
20 are swapped by initzipimport() if we run in optimized mode. Also,
21 '/' is replaced by SEP there. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +000022static struct st_zip_searchorder zip_searchorder[] = {
Just van Rossum52e14d62002-12-30 22:08:05 +000023 {"/__init__.pyc", IS_PACKAGE | IS_BYTECODE},
24 {"/__init__.pyo", IS_PACKAGE | IS_BYTECODE},
25 {"/__init__.py", IS_PACKAGE | IS_SOURCE},
26 {".pyc", IS_BYTECODE},
27 {".pyo", IS_BYTECODE},
28 {".py", IS_SOURCE},
29 {"", 0}
30};
31
32/* zipimporter object definition and support */
33
34typedef struct _zipimporter ZipImporter;
35
36struct _zipimporter {
37 PyObject_HEAD
38 PyObject *archive; /* pathname of the Zip archive */
39 PyObject *prefix; /* file prefix: "a/sub/directory/" */
40 PyObject *files; /* dict with file info {path: toc_entry} */
41};
42
Just van Rossum52e14d62002-12-30 22:08:05 +000043static PyObject *ZipImportError;
44static PyObject *zip_directory_cache = NULL;
45
46/* forward decls */
47static PyObject *read_directory(char *archive);
48static PyObject *get_data(char *archive, PyObject *toc_entry);
49static PyObject *get_module_code(ZipImporter *self, char *fullname,
50 int *p_ispackage, char **p_modpath);
51
52
53#define ZipImporter_Check(op) PyObject_TypeCheck(op, &ZipImporter_Type)
54
55
56/* zipimporter.__init__
57 Split the "subdirectory" from the Zip archive path, lookup a matching
58 entry in sys.path_importer_cache, fetch the file directory from there
59 if found, or else read it from the archive. */
60static int
61zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds)
62{
63 char *path, *p, *prefix, buf[MAXPATHLEN+2];
Neal Norwitzd39d8612006-01-08 01:03:36 +000064 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +000065
Georg Brandl02c42872005-08-26 06:42:30 +000066 if (!_PyArg_NoKeywords("zipimporter()", kwds))
67 return -1;
68
Just van Rossum52e14d62002-12-30 22:08:05 +000069 if (!PyArg_ParseTuple(args, "s:zipimporter",
70 &path))
71 return -1;
72
73 len = strlen(path);
74 if (len == 0) {
75 PyErr_SetString(ZipImportError, "archive path is empty");
76 return -1;
77 }
78 if (len >= MAXPATHLEN) {
79 PyErr_SetString(ZipImportError,
80 "archive path too long");
81 return -1;
82 }
83 strcpy(buf, path);
84
85#ifdef ALTSEP
86 for (p = buf; *p; p++) {
87 if (*p == ALTSEP)
88 *p = SEP;
89 }
90#endif
91
92 path = NULL;
93 prefix = NULL;
94 for (;;) {
Martin v. Löwisa94568a2003-05-10 07:36:56 +000095#ifndef RISCOS
Just van Rossum52e14d62002-12-30 22:08:05 +000096 struct stat statbuf;
97 int rv;
98
99 rv = stat(buf, &statbuf);
100 if (rv == 0) {
101 /* it exists */
102 if (S_ISREG(statbuf.st_mode))
103 /* it's a file */
104 path = buf;
105 break;
106 }
Martin v. Löwisa94568a2003-05-10 07:36:56 +0000107#else
108 if (object_exists(buf)) {
109 /* it exists */
110 if (isfile(buf))
111 /* it's a file */
112 path = buf;
113 break;
114 }
115#endif
Just van Rossum52e14d62002-12-30 22:08:05 +0000116 /* back up one path element */
Just van Rossumd35c6db2003-01-02 12:55:48 +0000117 p = strrchr(buf, SEP);
Just van Rossum52e14d62002-12-30 22:08:05 +0000118 if (prefix != NULL)
119 *prefix = SEP;
120 if (p == NULL)
121 break;
122 *p = '\0';
123 prefix = p;
124 }
125 if (path != NULL) {
126 PyObject *files;
127 files = PyDict_GetItemString(zip_directory_cache, path);
128 if (files == NULL) {
129 files = read_directory(buf);
130 if (files == NULL)
131 return -1;
132 if (PyDict_SetItemString(zip_directory_cache, path,
133 files) != 0)
134 return -1;
135 }
136 else
137 Py_INCREF(files);
138 self->files = files;
139 }
140 else {
141 PyErr_SetString(ZipImportError, "not a Zip file");
142 return -1;
143 }
144
145 if (prefix == NULL)
146 prefix = "";
147 else {
148 prefix++;
149 len = strlen(prefix);
150 if (prefix[len-1] != SEP) {
151 /* add trailing SEP */
152 prefix[len] = SEP;
153 prefix[len + 1] = '\0';
154 }
155 }
156
157 self->archive = PyString_FromString(buf);
158 if (self->archive == NULL)
159 return -1;
160
161 self->prefix = PyString_FromString(prefix);
162 if (self->prefix == NULL)
163 return -1;
164
165 return 0;
166}
167
168/* GC support. */
169static int
170zipimporter_traverse(PyObject *obj, visitproc visit, void *arg)
171{
172 ZipImporter *self = (ZipImporter *)obj;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000173 Py_VISIT(self->files);
Just van Rossum52e14d62002-12-30 22:08:05 +0000174 return 0;
175}
176
177static void
178zipimporter_dealloc(ZipImporter *self)
179{
180 PyObject_GC_UnTrack(self);
181 Py_XDECREF(self->archive);
Just van Rossumf8b6de12002-12-31 09:51:59 +0000182 Py_XDECREF(self->prefix);
Just van Rossum52e14d62002-12-30 22:08:05 +0000183 Py_XDECREF(self->files);
184 self->ob_type->tp_free((PyObject *)self);
185}
186
187static PyObject *
188zipimporter_repr(ZipImporter *self)
189{
190 char buf[500];
191 char *archive = "???";
192 char *prefix = "";
193
194 if (self->archive != NULL && PyString_Check(self->archive))
195 archive = PyString_AsString(self->archive);
196 if (self->prefix != NULL && PyString_Check(self->prefix))
197 prefix = PyString_AsString(self->prefix);
198 if (prefix != NULL && *prefix)
199 PyOS_snprintf(buf, sizeof(buf),
200 "<zipimporter object \"%.300s%c%.150s\">",
201 archive, SEP, prefix);
202 else
203 PyOS_snprintf(buf, sizeof(buf),
204 "<zipimporter object \"%.300s\">",
205 archive);
206 return PyString_FromString(buf);
207}
208
209/* return fullname.split(".")[-1] */
210static char *
211get_subname(char *fullname)
212{
213 char *subname = strrchr(fullname, '.');
214 if (subname == NULL)
215 subname = fullname;
216 else
217 subname++;
218 return subname;
219}
220
221/* Given a (sub)modulename, write the potential file path in the
222 archive (without extension) to the path buffer. Return the
223 length of the resulting string. */
224static int
225make_filename(char *prefix, char *name, char *path)
226{
Neal Norwitzd39d8612006-01-08 01:03:36 +0000227 size_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000228 char *p;
229
230 len = strlen(prefix);
231
232 /* self.prefix + name [+ SEP + "__init__"] + ".py[co]" */
233 if (len + strlen(name) + 13 >= MAXPATHLEN) {
234 PyErr_SetString(ZipImportError, "path too long");
235 return -1;
236 }
237
238 strcpy(path, prefix);
239 strcpy(path + len, name);
240 for (p = path + len; *p; p++) {
241 if (*p == '.')
242 *p = SEP;
243 }
244 len += strlen(name);
Martin v. Löwis18e16552006-02-15 17:27:45 +0000245 assert(len < INT_MAX);
Neal Norwitzd39d8612006-01-08 01:03:36 +0000246 return (int)len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000247}
248
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000249enum zi_module_info {
Just van Rossum52e14d62002-12-30 22:08:05 +0000250 MI_ERROR,
251 MI_NOT_FOUND,
252 MI_MODULE,
253 MI_PACKAGE
254};
255
256/* Return some information about a module. */
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000257static enum zi_module_info
Just van Rossum52e14d62002-12-30 22:08:05 +0000258get_module_info(ZipImporter *self, char *fullname)
259{
260 char *subname, path[MAXPATHLEN + 1];
261 int len;
262 struct st_zip_searchorder *zso;
263
264 subname = get_subname(fullname);
265
266 len = make_filename(PyString_AsString(self->prefix), subname, path);
267 if (len < 0)
268 return MI_ERROR;
269
270 for (zso = zip_searchorder; *zso->suffix; zso++) {
271 strcpy(path + len, zso->suffix);
272 if (PyDict_GetItemString(self->files, path) != NULL) {
273 if (zso->type & IS_PACKAGE)
274 return MI_PACKAGE;
275 else
276 return MI_MODULE;
277 }
278 }
279 return MI_NOT_FOUND;
280}
281
282/* Check whether we can satisfy the import of the module named by
283 'fullname'. Return self if we can, None if we can't. */
284static PyObject *
285zipimporter_find_module(PyObject *obj, PyObject *args)
286{
287 ZipImporter *self = (ZipImporter *)obj;
288 PyObject *path = NULL;
289 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000290 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000291
292 if (!PyArg_ParseTuple(args, "s|O:zipimporter.find_module",
293 &fullname, &path))
294 return NULL;
295
296 mi = get_module_info(self, fullname);
297 if (mi == MI_ERROR)
298 return NULL;
299 if (mi == MI_NOT_FOUND) {
300 Py_INCREF(Py_None);
301 return Py_None;
302 }
303 Py_INCREF(self);
304 return (PyObject *)self;
305}
306
307/* Load and return the module named by 'fullname'. */
308static PyObject *
309zipimporter_load_module(PyObject *obj, PyObject *args)
310{
311 ZipImporter *self = (ZipImporter *)obj;
312 PyObject *code, *mod, *dict;
313 char *fullname, *modpath;
314 int ispackage;
315
316 if (!PyArg_ParseTuple(args, "s:zipimporter.load_module",
317 &fullname))
318 return NULL;
319
320 code = get_module_code(self, fullname, &ispackage, &modpath);
321 if (code == NULL)
322 return NULL;
323
324 mod = PyImport_AddModule(fullname);
325 if (mod == NULL) {
326 Py_DECREF(code);
327 return NULL;
328 }
329 dict = PyModule_GetDict(mod);
330
331 /* mod.__loader__ = self */
332 if (PyDict_SetItemString(dict, "__loader__", (PyObject *)self) != 0)
333 goto error;
334
335 if (ispackage) {
336 /* add __path__ to the module *before* the code gets
337 executed */
338 PyObject *pkgpath, *fullpath;
339 char *prefix = PyString_AsString(self->prefix);
340 char *subname = get_subname(fullname);
341 int err;
342
343 fullpath = PyString_FromFormat("%s%c%s%s",
344 PyString_AsString(self->archive),
345 SEP,
346 *prefix ? prefix : "",
347 subname);
348 if (fullpath == NULL)
349 goto error;
350
351 pkgpath = Py_BuildValue("[O]", fullpath);
352 Py_DECREF(fullpath);
353 if (pkgpath == NULL)
354 goto error;
355 err = PyDict_SetItemString(dict, "__path__", pkgpath);
356 Py_DECREF(pkgpath);
357 if (err != 0)
358 goto error;
359 }
360 mod = PyImport_ExecCodeModuleEx(fullname, code, modpath);
361 Py_DECREF(code);
362 if (Py_VerboseFlag)
363 PySys_WriteStderr("import %s # loaded from Zip %s\n",
364 fullname, modpath);
365 return mod;
366error:
367 Py_DECREF(code);
368 Py_DECREF(mod);
369 return NULL;
370}
371
372/* Return a bool signifying whether the module is a package or not. */
373static PyObject *
374zipimporter_is_package(PyObject *obj, PyObject *args)
375{
376 ZipImporter *self = (ZipImporter *)obj;
377 char *fullname;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000378 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000379
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000380 if (!PyArg_ParseTuple(args, "s:zipimporter.is_package",
Just van Rossum52e14d62002-12-30 22:08:05 +0000381 &fullname))
382 return NULL;
383
384 mi = get_module_info(self, fullname);
385 if (mi == MI_ERROR)
386 return NULL;
387 if (mi == MI_NOT_FOUND) {
388 PyErr_Format(ZipImportError, "can't find module '%.200s'",
389 fullname);
390 return NULL;
391 }
392 return PyBool_FromLong(mi == MI_PACKAGE);
393}
394
395static PyObject *
396zipimporter_get_data(PyObject *obj, PyObject *args)
397{
398 ZipImporter *self = (ZipImporter *)obj;
399 char *path;
400#ifdef ALTSEP
Tim Peters1ea93f22002-12-30 22:42:57 +0000401 char *p, buf[MAXPATHLEN + 1];
Just van Rossum52e14d62002-12-30 22:08:05 +0000402#endif
403 PyObject *toc_entry;
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000404 Py_ssize_t len;
Just van Rossum52e14d62002-12-30 22:08:05 +0000405
406 if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path))
407 return NULL;
408
409#ifdef ALTSEP
410 if (strlen(path) >= MAXPATHLEN) {
411 PyErr_SetString(ZipImportError, "path too long");
412 return NULL;
413 }
414 strcpy(buf, path);
415 for (p = buf; *p; p++) {
416 if (*p == ALTSEP)
417 *p = SEP;
418 }
419 path = buf;
420#endif
421 len = PyString_Size(self->archive);
Tim Petersf271c272002-12-30 22:44:03 +0000422 if ((size_t)len < strlen(path) &&
Just van Rossum52e14d62002-12-30 22:08:05 +0000423 strncmp(path, PyString_AsString(self->archive), len) == 0 &&
424 path[len] == SEP) {
425 path = path + len + 1;
426 }
427
428 toc_entry = PyDict_GetItemString(self->files, path);
429 if (toc_entry == NULL) {
Georg Brandle9b19492006-02-19 09:38:58 +0000430 PyErr_SetFromErrnoWithFilename(PyExc_IOError, path);
Just van Rossum52e14d62002-12-30 22:08:05 +0000431 return NULL;
432 }
433 return get_data(PyString_AsString(self->archive), toc_entry);
434}
435
436static PyObject *
437zipimporter_get_code(PyObject *obj, PyObject *args)
438{
439 ZipImporter *self = (ZipImporter *)obj;
440 char *fullname;
441
442 if (!PyArg_ParseTuple(args, "s:zipimporter.get_code", &fullname))
443 return NULL;
444
445 return get_module_code(self, fullname, NULL, NULL);
446}
447
448static PyObject *
449zipimporter_get_source(PyObject *obj, PyObject *args)
450{
451 ZipImporter *self = (ZipImporter *)obj;
452 PyObject *toc_entry;
453 char *fullname, *subname, path[MAXPATHLEN+1];
454 int len;
Raymond Hettinger2c45c9a2004-11-10 13:08:35 +0000455 enum zi_module_info mi;
Just van Rossum52e14d62002-12-30 22:08:05 +0000456
457 if (!PyArg_ParseTuple(args, "s:zipimporter.get_source", &fullname))
458 return NULL;
459
460 mi = get_module_info(self, fullname);
461 if (mi == MI_ERROR)
462 return NULL;
463 if (mi == MI_NOT_FOUND) {
464 PyErr_Format(ZipImportError, "can't find module '%.200s'",
465 fullname);
466 return NULL;
467 }
468 subname = get_subname(fullname);
469
470 len = make_filename(PyString_AsString(self->prefix), subname, path);
471 if (len < 0)
472 return NULL;
473
474 if (mi == MI_PACKAGE) {
475 path[len] = SEP;
476 strcpy(path + len + 1, "__init__.py");
477 }
478 else
479 strcpy(path + len, ".py");
480
481 toc_entry = PyDict_GetItemString(self->files, path);
482 if (toc_entry != NULL)
483 return get_data(PyString_AsString(self->archive), toc_entry);
484
485 /* we have the module, but no source */
486 Py_INCREF(Py_None);
487 return Py_None;
488}
489
490PyDoc_STRVAR(doc_find_module,
491"find_module(fullname, path=None) -> self or None.\n\
492\n\
493Search for a module specified by 'fullname'. 'fullname' must be the\n\
494fully qualified (dotted) module name. It returns the zipimporter\n\
495instance itself if the module was found, or None if it wasn't.\n\
496The optional 'path' argument is ignored -- it's there for compatibility\n\
497with the importer protocol.");
498
499PyDoc_STRVAR(doc_load_module,
500"load_module(fullname) -> module.\n\
501\n\
502Load the module specified by 'fullname'. 'fullname' must be the\n\
503fully qualified (dotted) module name. It returns the imported\n\
504module, or raises ZipImportError if it wasn't found.");
505
506PyDoc_STRVAR(doc_get_data,
507"get_data(pathname) -> string with file data.\n\
508\n\
509Return the data associated with 'pathname'. Raise IOError if\n\
510the file wasn't found.");
511
512PyDoc_STRVAR(doc_is_package,
513"is_package(fullname) -> bool.\n\
514\n\
515Return True if the module specified by fullname is a package.\n\
516Raise ZipImportError is the module couldn't be found.");
517
518PyDoc_STRVAR(doc_get_code,
519"get_code(fullname) -> code object.\n\
520\n\
521Return the code object for the specified module. Raise ZipImportError\n\
522is the module couldn't be found.");
523
524PyDoc_STRVAR(doc_get_source,
525"get_source(fullname) -> source string.\n\
526\n\
527Return the source code for the specified module. Raise ZipImportError\n\
528is the module couldn't be found, return None if the archive does\n\
529contain the module, but has no source for it.");
530
531static PyMethodDef zipimporter_methods[] = {
532 {"find_module", zipimporter_find_module, METH_VARARGS,
533 doc_find_module},
534 {"load_module", zipimporter_load_module, METH_VARARGS,
535 doc_load_module},
536 {"get_data", zipimporter_get_data, METH_VARARGS,
537 doc_get_data},
538 {"get_code", zipimporter_get_code, METH_VARARGS,
539 doc_get_code},
540 {"get_source", zipimporter_get_source, METH_VARARGS,
541 doc_get_source},
542 {"is_package", zipimporter_is_package, METH_VARARGS,
543 doc_is_package},
544 {NULL, NULL} /* sentinel */
545};
546
547static PyMemberDef zipimporter_members[] = {
548 {"archive", T_OBJECT, offsetof(ZipImporter, archive), READONLY},
549 {"prefix", T_OBJECT, offsetof(ZipImporter, prefix), READONLY},
550 {"_files", T_OBJECT, offsetof(ZipImporter, files), READONLY},
551 {NULL}
552};
553
554PyDoc_STRVAR(zipimporter_doc,
555"zipimporter(archivepath) -> zipimporter object\n\
556\n\
557Create a new zipimporter instance. 'archivepath' must be a path to\n\
558a zipfile. ZipImportError is raised if 'archivepath' doesn't point to\n\
559a valid Zip archive.");
560
561#define DEFERRED_ADDRESS(ADDR) 0
562
563static PyTypeObject ZipImporter_Type = {
564 PyObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type))
565 0,
566 "zipimport.zipimporter",
567 sizeof(ZipImporter),
568 0, /* tp_itemsize */
569 (destructor)zipimporter_dealloc, /* tp_dealloc */
570 0, /* tp_print */
571 0, /* tp_getattr */
572 0, /* tp_setattr */
573 0, /* tp_compare */
574 (reprfunc)zipimporter_repr, /* tp_repr */
575 0, /* tp_as_number */
576 0, /* tp_as_sequence */
577 0, /* tp_as_mapping */
578 0, /* tp_hash */
579 0, /* tp_call */
580 0, /* tp_str */
581 PyObject_GenericGetAttr, /* tp_getattro */
582 0, /* tp_setattro */
583 0, /* tp_as_buffer */
584 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
585 Py_TPFLAGS_HAVE_GC, /* tp_flags */
586 zipimporter_doc, /* tp_doc */
587 zipimporter_traverse, /* tp_traverse */
588 0, /* tp_clear */
589 0, /* tp_richcompare */
590 0, /* tp_weaklistoffset */
591 0, /* tp_iter */
592 0, /* tp_iternext */
593 zipimporter_methods, /* tp_methods */
594 zipimporter_members, /* tp_members */
595 0, /* tp_getset */
596 0, /* tp_base */
597 0, /* tp_dict */
598 0, /* tp_descr_get */
599 0, /* tp_descr_set */
600 0, /* tp_dictoffset */
601 (initproc)zipimporter_init, /* tp_init */
602 PyType_GenericAlloc, /* tp_alloc */
603 PyType_GenericNew, /* tp_new */
604 PyObject_GC_Del, /* tp_free */
605};
606
607
608/* implementation */
609
Just van Rossum52e14d62002-12-30 22:08:05 +0000610/* Given a buffer, return the long that is represented by the first
611 4 bytes, encoded as little endian. This partially reimplements
612 marshal.c:r_long() */
613static long
614get_long(unsigned char *buf) {
615 long x;
616 x = buf[0];
617 x |= (long)buf[1] << 8;
618 x |= (long)buf[2] << 16;
619 x |= (long)buf[3] << 24;
620#if SIZEOF_LONG > 4
621 /* Sign extension for 64-bit machines */
622 x |= -(x & 0x80000000L);
623#endif
624 return x;
625}
626
627/*
628 read_directory(archive) -> files dict (new reference)
629
630 Given a path to a Zip archive, build a dict, mapping file names
631 (local to the archive, using SEP as a separator) to toc entries.
632
633 A toc_entry is a tuple:
634
Fred Drakef5b7fd22005-11-11 19:34:56 +0000635 (__file__, # value to use for __file__, available for all files
636 compress, # compression kind; 0 for uncompressed
Just van Rossum52e14d62002-12-30 22:08:05 +0000637 data_size, # size of compressed data on disk
638 file_size, # size of decompressed data
639 file_offset, # offset of file header from start of archive
640 time, # mod time of file (in dos format)
641 date, # mod data of file (in dos format)
642 crc, # crc checksum of the data
643 )
644
645 Directories can be recognized by the trailing SEP in the name,
646 data_size and file_offset are 0.
647*/
648static PyObject *
649read_directory(char *archive)
650{
651 PyObject *files = NULL;
652 FILE *fp;
653 long compress, crc, data_size, file_size, file_offset, date, time;
Thomas Heller354e3d92003-07-22 18:10:15 +0000654 long header_offset, name_size, header_size, header_position;
Neal Norwitzd39d8612006-01-08 01:03:36 +0000655 long i, l, count;
656 size_t length;
Just van Rossum52e14d62002-12-30 22:08:05 +0000657 char path[MAXPATHLEN + 5];
658 char name[MAXPATHLEN + 5];
659 char *p, endof_central_dir[22];
Thomas Heller354e3d92003-07-22 18:10:15 +0000660 long arc_offset; /* offset from beginning of file to start of zip-archive */
Just van Rossum52e14d62002-12-30 22:08:05 +0000661
662 if (strlen(archive) > MAXPATHLEN) {
663 PyErr_SetString(PyExc_OverflowError,
664 "Zip path name is too long");
665 return NULL;
666 }
667 strcpy(path, archive);
668
669 fp = fopen(archive, "rb");
670 if (fp == NULL) {
671 PyErr_Format(ZipImportError, "can't open Zip file: "
672 "'%.200s'", archive);
673 return NULL;
674 }
Just van Rossumf4ecc752003-02-28 08:54:01 +0000675 fseek(fp, -22, SEEK_END);
Thomas Heller354e3d92003-07-22 18:10:15 +0000676 header_position = ftell(fp);
Just van Rossum52e14d62002-12-30 22:08:05 +0000677 if (fread(endof_central_dir, 1, 22, fp) != 22) {
678 fclose(fp);
679 PyErr_Format(ZipImportError, "can't read Zip file: "
680 "'%.200s'", archive);
681 return NULL;
682 }
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000683 if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000684 /* Bad: End of Central Dir signature */
685 fclose(fp);
686 PyErr_Format(ZipImportError, "not a Zip file: "
687 "'%.200s'", archive);
688 return NULL;
689 }
690
Thomas Heller354e3d92003-07-22 18:10:15 +0000691 header_size = get_long((unsigned char *)endof_central_dir + 12);
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000692 header_offset = get_long((unsigned char *)endof_central_dir + 16);
Thomas Heller354e3d92003-07-22 18:10:15 +0000693 arc_offset = header_position - header_offset - header_size;
694 header_offset += arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000695
696 files = PyDict_New();
697 if (files == NULL)
698 goto error;
699
700 length = (long)strlen(path);
701 path[length] = SEP;
702
703 /* Start of Central Directory */
704 count = 0;
705 for (;;) {
706 PyObject *t;
707 int err;
708
709 fseek(fp, header_offset, 0); /* Start of file header */
710 l = PyMarshal_ReadLongFromFile(fp);
711 if (l != 0x02014B50)
712 break; /* Bad: Central Dir File Header */
713 fseek(fp, header_offset + 10, 0);
714 compress = PyMarshal_ReadShortFromFile(fp);
715 time = PyMarshal_ReadShortFromFile(fp);
716 date = PyMarshal_ReadShortFromFile(fp);
717 crc = PyMarshal_ReadLongFromFile(fp);
718 data_size = PyMarshal_ReadLongFromFile(fp);
719 file_size = PyMarshal_ReadLongFromFile(fp);
720 name_size = PyMarshal_ReadShortFromFile(fp);
721 header_size = 46 + name_size +
722 PyMarshal_ReadShortFromFile(fp) +
723 PyMarshal_ReadShortFromFile(fp);
724 fseek(fp, header_offset + 42, 0);
Thomas Heller354e3d92003-07-22 18:10:15 +0000725 file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
Just van Rossum52e14d62002-12-30 22:08:05 +0000726 if (name_size > MAXPATHLEN)
727 name_size = MAXPATHLEN;
728
729 p = name;
730 for (i = 0; i < name_size; i++) {
731 *p = (char)getc(fp);
732 if (*p == '/')
733 *p = SEP;
734 p++;
735 }
736 *p = 0; /* Add terminating null byte */
737 header_offset += header_size;
738
739 strncpy(path + length + 1, name, MAXPATHLEN - length - 1);
740
741 t = Py_BuildValue("siiiiiii", path, compress, data_size,
742 file_size, file_offset, time, date, crc);
743 if (t == NULL)
744 goto error;
745 err = PyDict_SetItemString(files, name, t);
746 Py_DECREF(t);
747 if (err != 0)
748 goto error;
749 count++;
750 }
751 fclose(fp);
752 if (Py_VerboseFlag)
753 PySys_WriteStderr("# zipimport: found %ld names in %s\n",
754 count, archive);
755 return files;
756error:
757 fclose(fp);
758 Py_XDECREF(files);
759 return NULL;
760}
761
762/* Return the zlib.decompress function object, or NULL if zlib couldn't
763 be imported. The function is cached when found, so subsequent calls
764 don't import zlib again. Returns a *borrowed* reference.
765 XXX This makes zlib.decompress immortal. */
766static PyObject *
767get_decompress_func(void)
768{
769 static PyObject *decompress = NULL;
770
771 if (decompress == NULL) {
772 PyObject *zlib;
773 static int importing_zlib = 0;
774
775 if (importing_zlib != 0)
776 /* Someone has a zlib.py[co] in their Zip file;
777 let's avoid a stack overflow. */
778 return NULL;
779 importing_zlib = 1;
780 zlib = PyImport_ImportModule("zlib"); /* import zlib */
781 importing_zlib = 0;
782 if (zlib != NULL) {
783 decompress = PyObject_GetAttrString(zlib,
784 "decompress");
785 Py_DECREF(zlib);
786 }
787 else
788 PyErr_Clear();
789 if (Py_VerboseFlag)
790 PySys_WriteStderr("# zipimport: zlib %s\n",
791 zlib != NULL ? "available": "UNAVAILABLE");
792 }
793 return decompress;
794}
795
796/* Given a path to a Zip file and a toc_entry, return the (uncompressed)
797 data as a new reference. */
798static PyObject *
799get_data(char *archive, PyObject *toc_entry)
800{
801 PyObject *raw_data, *data = NULL, *decompress;
802 char *buf;
803 FILE *fp;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000804 int err;
805 Py_ssize_t bytes_read = 0;
Just van Rossum52e14d62002-12-30 22:08:05 +0000806 long l;
807 char *datapath;
808 long compress, data_size, file_size, file_offset;
809 long time, date, crc;
810
Neal Norwitz0c0aad92003-02-18 03:37:49 +0000811 if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress,
Just van Rossum52e14d62002-12-30 22:08:05 +0000812 &data_size, &file_size, &file_offset, &time,
813 &date, &crc)) {
814 return NULL;
815 }
816
817 fp = fopen(archive, "rb");
818 if (!fp) {
819 PyErr_Format(PyExc_IOError,
820 "zipimport: can not open file %s", archive);
821 return NULL;
822 }
823
824 /* Check to make sure the local file header is correct */
825 fseek(fp, file_offset, 0);
826 l = PyMarshal_ReadLongFromFile(fp);
827 if (l != 0x04034B50) {
828 /* Bad: Local File Header */
829 PyErr_Format(ZipImportError,
830 "bad local file header in %s",
831 archive);
832 fclose(fp);
833 return NULL;
834 }
835 fseek(fp, file_offset + 26, 0);
836 l = 30 + PyMarshal_ReadShortFromFile(fp) +
837 PyMarshal_ReadShortFromFile(fp); /* local header size */
838 file_offset += l; /* Start of file data */
839
840 raw_data = PyString_FromStringAndSize((char *)NULL, compress == 0 ?
841 data_size : data_size + 1);
842 if (raw_data == NULL) {
843 fclose(fp);
844 return NULL;
845 }
846 buf = PyString_AsString(raw_data);
847
848 err = fseek(fp, file_offset, 0);
849 if (err == 0)
850 bytes_read = fread(buf, 1, data_size, fp);
851 fclose(fp);
852 if (err || bytes_read != data_size) {
853 PyErr_SetString(PyExc_IOError,
854 "zipimport: can't read data");
855 Py_DECREF(raw_data);
856 return NULL;
857 }
858
859 if (compress != 0) {
860 buf[data_size] = 'Z'; /* saw this in zipfile.py */
861 data_size++;
862 }
863 buf[data_size] = '\0';
864
865 if (compress == 0) /* data is not compressed */
866 return raw_data;
867
868 /* Decompress with zlib */
869 decompress = get_decompress_func();
870 if (decompress == NULL) {
871 PyErr_SetString(ZipImportError,
872 "can't decompress data; "
873 "zlib not available");
874 goto error;
875 }
Just van Rossumee8f10f2003-09-07 13:36:48 +0000876 data = PyObject_CallFunction(decompress, "Oi", raw_data, -15);
Just van Rossum52e14d62002-12-30 22:08:05 +0000877error:
878 Py_DECREF(raw_data);
879 return data;
880}
881
882/* Lenient date/time comparison function. The precision of the mtime
883 in the archive is lower than the mtime stored in a .pyc: we
884 must allow a difference of at most one second. */
885static int
886eq_mtime(time_t t1, time_t t2)
887{
888 time_t d = t1 - t2;
889 if (d < 0)
890 d = -d;
891 /* dostime only stores even seconds, so be lenient */
892 return d <= 1;
893}
894
895/* Given the contents of a .py[co] file in a buffer, unmarshal the data
896 and return the code object. Return None if it the magic word doesn't
897 match (we do this instead of raising an exception as we fall back
898 to .py if available and we don't want to mask other errors).
899 Returns a new reference. */
900static PyObject *
901unmarshal_code(char *pathname, PyObject *data, time_t mtime)
902{
903 PyObject *code;
904 char *buf = PyString_AsString(data);
Martin v. Löwisad0a4622006-02-16 14:30:23 +0000905 Py_ssize_t size = PyString_Size(data);
Just van Rossum52e14d62002-12-30 22:08:05 +0000906
907 if (size <= 9) {
908 PyErr_SetString(ZipImportError,
909 "bad pyc data");
910 return NULL;
911 }
912
Jack Jansen5eaeaf92002-12-30 23:06:14 +0000913 if (get_long((unsigned char *)buf) != PyImport_GetMagicNumber()) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000914 if (Py_VerboseFlag)
915 PySys_WriteStderr("# %s has bad magic\n",
916 pathname);
917 Py_INCREF(Py_None);
918 return Py_None; /* signal caller to try alternative */
919 }
920
Just van Rossum9a3129c2003-01-03 11:18:56 +0000921 if (mtime != 0 && !eq_mtime(get_long((unsigned char *)buf + 4),
922 mtime)) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000923 if (Py_VerboseFlag)
924 PySys_WriteStderr("# %s has bad mtime\n",
925 pathname);
926 Py_INCREF(Py_None);
927 return Py_None; /* signal caller to try alternative */
928 }
929
930 code = PyMarshal_ReadObjectFromString(buf + 8, size - 8);
931 if (code == NULL)
932 return NULL;
933 if (!PyCode_Check(code)) {
934 Py_DECREF(code);
935 PyErr_Format(PyExc_TypeError,
936 "compiled module %.200s is not a code object",
937 pathname);
938 return NULL;
939 }
940 return code;
941}
942
943/* Replace any occurances of "\r\n?" in the input string with "\n".
944 This converts DOS and Mac line endings to Unix line endings.
945 Also append a trailing "\n" to be compatible with
946 PyParser_SimpleParseFile(). Returns a new reference. */
947static PyObject *
948normalize_line_endings(PyObject *source)
949{
Just van Rossum9a3129c2003-01-03 11:18:56 +0000950 char *buf, *q, *p = PyString_AsString(source);
Just van Rossum52e14d62002-12-30 22:08:05 +0000951 PyObject *fixed_source;
952
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000953 if (!p)
954 return NULL;
955
Just van Rossum9a3129c2003-01-03 11:18:56 +0000956 /* one char extra for trailing \n and one for terminating \0 */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000957 buf = (char *)PyMem_Malloc(PyString_Size(source) + 2);
Just van Rossum9a3129c2003-01-03 11:18:56 +0000958 if (buf == NULL) {
959 PyErr_SetString(PyExc_MemoryError,
960 "zipimport: no memory to allocate "
961 "source buffer");
Just van Rossum52e14d62002-12-30 22:08:05 +0000962 return NULL;
Just van Rossum9a3129c2003-01-03 11:18:56 +0000963 }
Just van Rossum52e14d62002-12-30 22:08:05 +0000964 /* replace "\r\n?" by "\n" */
Neal Norwitz5c1ba532003-02-17 18:05:20 +0000965 for (q = buf; *p != '\0'; p++) {
Just van Rossum52e14d62002-12-30 22:08:05 +0000966 if (*p == '\r') {
967 *q++ = '\n';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000968 if (*(p + 1) == '\n')
Just van Rossum52e14d62002-12-30 22:08:05 +0000969 p++;
Just van Rossum52e14d62002-12-30 22:08:05 +0000970 }
971 else
972 *q++ = *p;
Just van Rossum52e14d62002-12-30 22:08:05 +0000973 }
974 *q++ = '\n'; /* add trailing \n */
975 *q = '\0';
Just van Rossum9a3129c2003-01-03 11:18:56 +0000976 fixed_source = PyString_FromString(buf);
977 PyMem_Free(buf);
Just van Rossum52e14d62002-12-30 22:08:05 +0000978 return fixed_source;
979}
980
981/* Given a string buffer containing Python source code, compile it
982 return and return a code object as a new reference. */
983static PyObject *
984compile_source(char *pathname, PyObject *source)
985{
986 PyObject *code, *fixed_source;
987
988 fixed_source = normalize_line_endings(source);
989 if (fixed_source == NULL)
990 return NULL;
991
992 code = Py_CompileString(PyString_AsString(fixed_source), pathname,
993 Py_file_input);
994 Py_DECREF(fixed_source);
995 return code;
996}
997
998/* Convert the date/time values found in the Zip archive to a value
999 that's compatible with the time stamp stored in .pyc files. */
Neal Norwitz29fd2ba2003-03-23 13:21:03 +00001000static time_t
1001parse_dostime(int dostime, int dosdate)
Just van Rossum52e14d62002-12-30 22:08:05 +00001002{
1003 struct tm stm;
1004
1005 stm.tm_sec = (dostime & 0x1f) * 2;
1006 stm.tm_min = (dostime >> 5) & 0x3f;
1007 stm.tm_hour = (dostime >> 11) & 0x1f;
1008 stm.tm_mday = dosdate & 0x1f;
1009 stm.tm_mon = ((dosdate >> 5) & 0x0f) - 1;
1010 stm.tm_year = ((dosdate >> 9) & 0x7f) + 80;
Just van Rossum547eb422003-04-08 20:07:15 +00001011 stm.tm_isdst = -1; /* wday/yday is ignored */
Just van Rossum52e14d62002-12-30 22:08:05 +00001012
1013 return mktime(&stm);
1014}
1015
1016/* Given a path to a .pyc or .pyo file in the archive, return the
1017 modifictaion time of the matching .py file, or 0 if no source
1018 is available. */
1019static time_t
1020get_mtime_of_source(ZipImporter *self, char *path)
1021{
1022 PyObject *toc_entry;
1023 time_t mtime = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001024 Py_ssize_t lastchar = strlen(path) - 1;
Just van Rossum52e14d62002-12-30 22:08:05 +00001025 char savechar = path[lastchar];
1026 path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */
1027 toc_entry = PyDict_GetItemString(self->files, path);
1028 if (toc_entry != NULL && PyTuple_Check(toc_entry) &&
1029 PyTuple_Size(toc_entry) == 8) {
Just van Rossumf8b6de12002-12-31 09:51:59 +00001030 /* fetch the time stamp of the .py file for comparison
1031 with an embedded pyc time stamp */
1032 int time, date;
Just van Rossum52e14d62002-12-30 22:08:05 +00001033 time = PyInt_AsLong(PyTuple_GetItem(toc_entry, 5));
1034 date = PyInt_AsLong(PyTuple_GetItem(toc_entry, 6));
1035 mtime = parse_dostime(time, date);
1036 }
1037 path[lastchar] = savechar;
1038 return mtime;
1039}
1040
1041/* Return the code object for the module named by 'fullname' from the
1042 Zip archive as a new reference. */
1043static PyObject *
1044get_code_from_data(ZipImporter *self, int ispackage, int isbytecode,
1045 time_t mtime, PyObject *toc_entry)
1046{
1047 PyObject *data, *code;
1048 char *modpath;
1049 char *archive = PyString_AsString(self->archive);
1050
1051 if (archive == NULL)
1052 return NULL;
1053
1054 data = get_data(archive, toc_entry);
1055 if (data == NULL)
1056 return NULL;
1057
1058 modpath = PyString_AsString(PyTuple_GetItem(toc_entry, 0));
1059
1060 if (isbytecode) {
1061 code = unmarshal_code(modpath, data, mtime);
1062 }
1063 else {
1064 code = compile_source(modpath, data);
1065 }
1066 Py_DECREF(data);
1067 return code;
1068}
1069
1070/* Get the code object assoiciated with the module specified by
1071 'fullname'. */
1072static PyObject *
1073get_module_code(ZipImporter *self, char *fullname,
1074 int *p_ispackage, char **p_modpath)
1075{
1076 PyObject *toc_entry;
1077 char *subname, path[MAXPATHLEN + 1];
1078 int len;
1079 struct st_zip_searchorder *zso;
1080
1081 subname = get_subname(fullname);
1082
1083 len = make_filename(PyString_AsString(self->prefix), subname, path);
1084 if (len < 0)
1085 return NULL;
1086
1087 for (zso = zip_searchorder; *zso->suffix; zso++) {
1088 PyObject *code = NULL;
1089
1090 strcpy(path + len, zso->suffix);
1091 if (Py_VerboseFlag > 1)
1092 PySys_WriteStderr("# trying %s%c%s\n",
1093 PyString_AsString(self->archive),
1094 SEP, path);
1095 toc_entry = PyDict_GetItemString(self->files, path);
1096 if (toc_entry != NULL) {
1097 time_t mtime = 0;
1098 int ispackage = zso->type & IS_PACKAGE;
1099 int isbytecode = zso->type & IS_BYTECODE;
1100
1101 if (isbytecode)
1102 mtime = get_mtime_of_source(self, path);
1103 if (p_ispackage != NULL)
1104 *p_ispackage = ispackage;
1105 code = get_code_from_data(self, ispackage,
1106 isbytecode, mtime,
1107 toc_entry);
1108 if (code == Py_None) {
1109 /* bad magic number or non-matching mtime
1110 in byte code, try next */
1111 Py_DECREF(code);
1112 continue;
1113 }
1114 if (code != NULL && p_modpath != NULL)
1115 *p_modpath = PyString_AsString(
1116 PyTuple_GetItem(toc_entry, 0));
1117 return code;
1118 }
1119 }
1120 PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname);
1121 return NULL;
1122}
1123
1124
1125/* Module init */
1126
1127PyDoc_STRVAR(zipimport_doc,
1128"zipimport provides support for importing Python modules from Zip archives.\n\
1129\n\
1130This module exports three objects:\n\
1131- zipimporter: a class; its constructor takes a path to a Zip archive.\n\
Fredrik Lundhb84b35f2006-01-15 15:00:40 +00001132- ZipImportError: exception raised by zipimporter objects. It's a\n\
Just van Rossum52e14d62002-12-30 22:08:05 +00001133 subclass of ImportError, so it can be caught as ImportError, too.\n\
1134- _zip_directory_cache: a dict, mapping archive paths to zip directory\n\
1135 info dicts, as used in zipimporter._files.\n\
1136\n\
1137It is usually not needed to use the zipimport module explicitly; it is\n\
1138used by the builtin import mechanism for sys.path items that are paths\n\
1139to Zip archives.");
1140
1141PyMODINIT_FUNC
1142initzipimport(void)
1143{
1144 PyObject *mod;
1145
1146 if (PyType_Ready(&ZipImporter_Type) < 0)
1147 return;
1148
1149 /* Correct directory separator */
1150 zip_searchorder[0].suffix[0] = SEP;
1151 zip_searchorder[1].suffix[0] = SEP;
1152 zip_searchorder[2].suffix[0] = SEP;
1153 if (Py_OptimizeFlag) {
1154 /* Reverse *.pyc and *.pyo */
1155 struct st_zip_searchorder tmp;
1156 tmp = zip_searchorder[0];
1157 zip_searchorder[0] = zip_searchorder[1];
1158 zip_searchorder[1] = tmp;
1159 tmp = zip_searchorder[3];
1160 zip_searchorder[3] = zip_searchorder[4];
1161 zip_searchorder[4] = tmp;
1162 }
1163
1164 mod = Py_InitModule4("zipimport", NULL, zipimport_doc,
1165 NULL, PYTHON_API_VERSION);
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00001166 if (mod == NULL)
1167 return;
Just van Rossum52e14d62002-12-30 22:08:05 +00001168
1169 ZipImportError = PyErr_NewException("zipimport.ZipImportError",
1170 PyExc_ImportError, NULL);
1171 if (ZipImportError == NULL)
1172 return;
1173
1174 Py_INCREF(ZipImportError);
1175 if (PyModule_AddObject(mod, "ZipImportError",
1176 ZipImportError) < 0)
1177 return;
1178
1179 Py_INCREF(&ZipImporter_Type);
1180 if (PyModule_AddObject(mod, "zipimporter",
1181 (PyObject *)&ZipImporter_Type) < 0)
1182 return;
Just van Rossumf8b6de12002-12-31 09:51:59 +00001183
Just van Rossum52e14d62002-12-30 22:08:05 +00001184 zip_directory_cache = PyDict_New();
1185 if (zip_directory_cache == NULL)
1186 return;
1187 Py_INCREF(zip_directory_cache);
1188 if (PyModule_AddObject(mod, "_zip_directory_cache",
1189 zip_directory_cache) < 0)
1190 return;
1191}