| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1 | """ | 
 | 2 | Read and write ZIP files. | 
 | 3 | """ | 
| Georg Brandl | 62416bc | 2008-01-07 18:47:44 +0000 | [diff] [blame] | 4 | import struct, os, time, sys, shutil | 
| Martin v. Löwis | 0dfcfc8 | 2009-01-24 14:00:33 +0000 | [diff] [blame] | 5 | import binascii, cStringIO, stat | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 6 | import io | 
 | 7 | import re | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 8 |  | 
 | 9 | try: | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 10 |     import zlib # We may need its compression method | 
| Gregory P. Smith | b89a096 | 2008-03-19 01:46:10 +0000 | [diff] [blame] | 11 |     crc32 = zlib.crc32 | 
| Guido van Rossum | 9c673f3 | 2001-04-10 15:37:12 +0000 | [diff] [blame] | 12 | except ImportError: | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 13 |     zlib = None | 
| Gregory P. Smith | b89a096 | 2008-03-19 01:46:10 +0000 | [diff] [blame] | 14 |     crc32 = binascii.crc32 | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 15 |  | 
| Skip Montanaro | 40fc160 | 2001-03-01 04:27:19 +0000 | [diff] [blame] | 16 | __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile", | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 17 |            "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ] | 
| Skip Montanaro | 40fc160 | 2001-03-01 04:27:19 +0000 | [diff] [blame] | 18 |  | 
| Fred Drake | 5db246d | 2000-09-29 20:44:48 +0000 | [diff] [blame] | 19 | class BadZipfile(Exception): | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 20 |     pass | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 21 |  | 
 | 22 |  | 
 | 23 | class LargeZipFile(Exception): | 
| Tim Peters | a608bb2 | 2006-06-15 18:06:29 +0000 | [diff] [blame] | 24 |     """ | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 25 |     Raised when writing a zipfile, the zipfile requires ZIP64 extensions | 
 | 26 |     and those extensions are disabled. | 
 | 27 |     """ | 
 | 28 |  | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 29 | error = BadZipfile      # The exception raised by this module | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 30 |  | 
| Amaury Forgeot d'Arc | d25f87a | 2009-01-17 16:40:17 +0000 | [diff] [blame] | 31 | ZIP64_LIMIT = (1 << 31) - 1 | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 32 | ZIP_FILECOUNT_LIMIT = 1 << 16 | 
 | 33 | ZIP_MAX_COMMENT = (1 << 16) - 1 | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 34 |  | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 35 | # constants for Zip file compression methods | 
 | 36 | ZIP_STORED = 0 | 
 | 37 | ZIP_DEFLATED = 8 | 
 | 38 | # Other ZIP compression methods not supported | 
 | 39 |  | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 40 | # Below are some formats and associated data for reading/writing headers using | 
 | 41 | # the struct module.  The names and structures of headers/records are those used | 
 | 42 | # in the PKWARE description of the ZIP file format: | 
 | 43 | #     http://www.pkware.com/documents/casestudies/APPNOTE.TXT | 
 | 44 | # (URL valid as of January 2008) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 45 |  | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 46 | # The "end of central directory" structure, magic number, size, and indices | 
 | 47 | # (section V.I in the format document) | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 48 | structEndArchive = "<4s4H2LH" | 
 | 49 | stringEndArchive = "PK\005\006" | 
 | 50 | sizeEndCentDir = struct.calcsize(structEndArchive) | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 51 |  | 
 | 52 | _ECD_SIGNATURE = 0 | 
 | 53 | _ECD_DISK_NUMBER = 1 | 
 | 54 | _ECD_DISK_START = 2 | 
 | 55 | _ECD_ENTRIES_THIS_DISK = 3 | 
 | 56 | _ECD_ENTRIES_TOTAL = 4 | 
 | 57 | _ECD_SIZE = 5 | 
 | 58 | _ECD_OFFSET = 6 | 
 | 59 | _ECD_COMMENT_SIZE = 7 | 
 | 60 | # These last two indices are not part of the structure as defined in the | 
 | 61 | # spec, but they are used internally by this module as a convenience | 
 | 62 | _ECD_COMMENT = 8 | 
 | 63 | _ECD_LOCATION = 9 | 
 | 64 |  | 
 | 65 | # The "central directory" structure, magic number, size, and indices | 
 | 66 | # of entries in the structure (section V.F in the format document) | 
 | 67 | structCentralDir = "<4s4B4HL2L5H2L" | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 68 | stringCentralDir = "PK\001\002" | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 69 | sizeCentralDir = struct.calcsize(structCentralDir) | 
 | 70 |  | 
| Fred Drake | 3e038e5 | 2001-02-28 17:56:26 +0000 | [diff] [blame] | 71 | # indexes of entries in the central directory structure | 
 | 72 | _CD_SIGNATURE = 0 | 
 | 73 | _CD_CREATE_VERSION = 1 | 
 | 74 | _CD_CREATE_SYSTEM = 2 | 
 | 75 | _CD_EXTRACT_VERSION = 3 | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 76 | _CD_EXTRACT_SYSTEM = 4 | 
| Fred Drake | 3e038e5 | 2001-02-28 17:56:26 +0000 | [diff] [blame] | 77 | _CD_FLAG_BITS = 5 | 
 | 78 | _CD_COMPRESS_TYPE = 6 | 
 | 79 | _CD_TIME = 7 | 
 | 80 | _CD_DATE = 8 | 
 | 81 | _CD_CRC = 9 | 
 | 82 | _CD_COMPRESSED_SIZE = 10 | 
 | 83 | _CD_UNCOMPRESSED_SIZE = 11 | 
 | 84 | _CD_FILENAME_LENGTH = 12 | 
 | 85 | _CD_EXTRA_FIELD_LENGTH = 13 | 
 | 86 | _CD_COMMENT_LENGTH = 14 | 
 | 87 | _CD_DISK_NUMBER_START = 15 | 
 | 88 | _CD_INTERNAL_FILE_ATTRIBUTES = 16 | 
 | 89 | _CD_EXTERNAL_FILE_ATTRIBUTES = 17 | 
 | 90 | _CD_LOCAL_HEADER_OFFSET = 18 | 
 | 91 |  | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 92 | # The "local file header" structure, magic number, size, and indices | 
 | 93 | # (section V.A in the format document) | 
 | 94 | structFileHeader = "<4s2B4HL2L2H" | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 95 | stringFileHeader = "PK\003\004" | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 96 | sizeFileHeader = struct.calcsize(structFileHeader) | 
 | 97 |  | 
| Fred Drake | 3e038e5 | 2001-02-28 17:56:26 +0000 | [diff] [blame] | 98 | _FH_SIGNATURE = 0 | 
 | 99 | _FH_EXTRACT_VERSION = 1 | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 100 | _FH_EXTRACT_SYSTEM = 2 | 
| Fred Drake | 3e038e5 | 2001-02-28 17:56:26 +0000 | [diff] [blame] | 101 | _FH_GENERAL_PURPOSE_FLAG_BITS = 3 | 
 | 102 | _FH_COMPRESSION_METHOD = 4 | 
 | 103 | _FH_LAST_MOD_TIME = 5 | 
 | 104 | _FH_LAST_MOD_DATE = 6 | 
 | 105 | _FH_CRC = 7 | 
 | 106 | _FH_COMPRESSED_SIZE = 8 | 
 | 107 | _FH_UNCOMPRESSED_SIZE = 9 | 
 | 108 | _FH_FILENAME_LENGTH = 10 | 
 | 109 | _FH_EXTRA_FIELD_LENGTH = 11 | 
 | 110 |  | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 111 | # The "Zip64 end of central directory locator" structure, magic number, and size | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 112 | structEndArchive64Locator = "<4sLQL" | 
 | 113 | stringEndArchive64Locator = "PK\x06\x07" | 
 | 114 | sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 115 |  | 
 | 116 | # The "Zip64 end of central directory" record, magic number, size, and indices | 
 | 117 | # (section V.G in the format document) | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 118 | structEndArchive64 = "<4sQ2H2L4Q" | 
 | 119 | stringEndArchive64 = "PK\x06\x06" | 
 | 120 | sizeEndCentDir64 = struct.calcsize(structEndArchive64) | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 121 |  | 
 | 122 | _CD64_SIGNATURE = 0 | 
 | 123 | _CD64_DIRECTORY_RECSIZE = 1 | 
 | 124 | _CD64_CREATE_VERSION = 2 | 
 | 125 | _CD64_EXTRACT_VERSION = 3 | 
 | 126 | _CD64_DISK_NUMBER = 4 | 
 | 127 | _CD64_DISK_NUMBER_START = 5 | 
 | 128 | _CD64_NUMBER_ENTRIES_THIS_DISK = 6 | 
 | 129 | _CD64_NUMBER_ENTRIES_TOTAL = 7 | 
 | 130 | _CD64_DIRECTORY_SIZE = 8 | 
 | 131 | _CD64_OFFSET_START_CENTDIR = 9 | 
 | 132 |  | 
| Antoine Pitrou | 6f193e0 | 2008-12-27 15:43:12 +0000 | [diff] [blame] | 133 | def _check_zipfile(fp): | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 134 |     try: | 
| Antoine Pitrou | 6f193e0 | 2008-12-27 15:43:12 +0000 | [diff] [blame] | 135 |         if _EndRecData(fp): | 
 | 136 |             return True         # file has correct magic number | 
| Fred Drake | 7e47380 | 2001-05-11 19:52:57 +0000 | [diff] [blame] | 137 |     except IOError: | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 138 |         pass | 
| Guido van Rossum | 8ca162f | 2002-04-07 06:36:23 +0000 | [diff] [blame] | 139 |     return False | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 140 |  | 
| Antoine Pitrou | 6f193e0 | 2008-12-27 15:43:12 +0000 | [diff] [blame] | 141 | def is_zipfile(filename): | 
 | 142 |     """Quickly see if a file is a ZIP file by checking the magic number. | 
 | 143 |  | 
 | 144 |     The filename argument may be a file or file-like object too. | 
 | 145 |     """ | 
 | 146 |     result = False | 
 | 147 |     try: | 
 | 148 |         if hasattr(filename, "read"): | 
 | 149 |             result = _check_zipfile(fp=filename) | 
 | 150 |         else: | 
 | 151 |             with open(filename, "rb") as fp: | 
 | 152 |                 result = _check_zipfile(fp) | 
 | 153 |     except IOError: | 
 | 154 |         pass | 
 | 155 |     return result | 
 | 156 |  | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 157 | def _EndRecData64(fpin, offset, endrec): | 
 | 158 |     """ | 
 | 159 |     Read the ZIP64 end-of-archive records and use that to update endrec | 
 | 160 |     """ | 
| Georg Brandl | 86e0c89 | 2010-11-26 07:22:28 +0000 | [diff] [blame^] | 161 |     try: | 
 | 162 |         fpin.seek(offset - sizeEndCentDir64Locator, 2) | 
 | 163 |     except IOError: | 
 | 164 |         # If the seek fails, the file is not large enough to contain a ZIP64 | 
 | 165 |         # end-of-archive record, so just return the end record we were given. | 
 | 166 |         return endrec | 
 | 167 |  | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 168 |     data = fpin.read(sizeEndCentDir64Locator) | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 169 |     sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) | 
 | 170 |     if sig != stringEndArchive64Locator: | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 171 |         return endrec | 
 | 172 |  | 
 | 173 |     if diskno != 0 or disks != 1: | 
 | 174 |         raise BadZipfile("zipfiles that span multiple disks are not supported") | 
 | 175 |  | 
| Tim Peters | a608bb2 | 2006-06-15 18:06:29 +0000 | [diff] [blame] | 176 |     # Assume no 'zip64 extensible data' | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 177 |     fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) | 
 | 178 |     data = fpin.read(sizeEndCentDir64) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 179 |     sig, sz, create_version, read_version, disk_num, disk_dir, \ | 
 | 180 |             dircount, dircount2, dirsize, diroffset = \ | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 181 |             struct.unpack(structEndArchive64, data) | 
 | 182 |     if sig != stringEndArchive64: | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 183 |         return endrec | 
 | 184 |  | 
 | 185 |     # Update the original endrec using data from the ZIP64 record | 
| Antoine Pitrou | ebcd0ce | 2008-09-05 23:30:23 +0000 | [diff] [blame] | 186 |     endrec[_ECD_SIGNATURE] = sig | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 187 |     endrec[_ECD_DISK_NUMBER] = disk_num | 
 | 188 |     endrec[_ECD_DISK_START] = disk_dir | 
 | 189 |     endrec[_ECD_ENTRIES_THIS_DISK] = dircount | 
 | 190 |     endrec[_ECD_ENTRIES_TOTAL] = dircount2 | 
 | 191 |     endrec[_ECD_SIZE] = dirsize | 
 | 192 |     endrec[_ECD_OFFSET] = diroffset | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 193 |     return endrec | 
 | 194 |  | 
 | 195 |  | 
| Martin v. Löwis | 6f6873b | 2002-10-13 13:54:50 +0000 | [diff] [blame] | 196 | def _EndRecData(fpin): | 
 | 197 |     """Return data from the "End of Central Directory" record, or None. | 
 | 198 |  | 
 | 199 |     The data is a list of the nine items in the ZIP "End of central dir" | 
 | 200 |     record followed by a tenth item, the file seek offset of this record.""" | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 201 |  | 
 | 202 |     # Determine file size | 
 | 203 |     fpin.seek(0, 2) | 
 | 204 |     filesize = fpin.tell() | 
 | 205 |  | 
 | 206 |     # Check to see if this is ZIP file with no archive comment (the | 
 | 207 |     # "end of central directory" structure should be the last item in the | 
 | 208 |     # file if this is the case). | 
| Amaury Forgeot d'Arc | 3e5b027 | 2009-07-28 22:15:30 +0000 | [diff] [blame] | 209 |     try: | 
 | 210 |         fpin.seek(-sizeEndCentDir, 2) | 
 | 211 |     except IOError: | 
 | 212 |         return None | 
| Martin v. Löwis | 6f6873b | 2002-10-13 13:54:50 +0000 | [diff] [blame] | 213 |     data = fpin.read() | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 214 |     if data[0:4] == stringEndArchive and data[-2:] == "\000\000": | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 215 |         # the signature is correct and there's no comment, unpack structure | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 216 |         endrec = struct.unpack(structEndArchive, data) | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 217 |         endrec=list(endrec) | 
 | 218 |  | 
 | 219 |         # Append a blank comment and record start offset | 
 | 220 |         endrec.append("") | 
 | 221 |         endrec.append(filesize - sizeEndCentDir) | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 222 |  | 
| Amaury Forgeot d'Arc | 2407ac9 | 2009-01-17 22:43:50 +0000 | [diff] [blame] | 223 |         # Try to read the "Zip64 end of central directory" structure | 
 | 224 |         return _EndRecData64(fpin, -sizeEndCentDir, endrec) | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 225 |  | 
 | 226 |     # Either this is not a ZIP file, or it is a ZIP file with an archive | 
 | 227 |     # comment.  Search the end of the file for the "end of central directory" | 
 | 228 |     # record signature. The comment is the last item in the ZIP file and may be | 
 | 229 |     # up to 64K long.  It is assumed that the "end of central directory" magic | 
 | 230 |     # number does not appear in the comment. | 
 | 231 |     maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) | 
 | 232 |     fpin.seek(maxCommentStart, 0) | 
| Martin v. Löwis | 6f6873b | 2002-10-13 13:54:50 +0000 | [diff] [blame] | 233 |     data = fpin.read() | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 234 |     start = data.rfind(stringEndArchive) | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 235 |     if start >= 0: | 
 | 236 |         # found the magic number; attempt to unpack and interpret | 
 | 237 |         recData = data[start:start+sizeEndCentDir] | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 238 |         endrec = list(struct.unpack(structEndArchive, recData)) | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 239 |         comment = data[start+sizeEndCentDir:] | 
 | 240 |         # check that comment length is correct | 
 | 241 |         if endrec[_ECD_COMMENT_SIZE] == len(comment): | 
| Martin v. Löwis | 6f6873b | 2002-10-13 13:54:50 +0000 | [diff] [blame] | 242 |             # Append the archive comment and start offset | 
 | 243 |             endrec.append(comment) | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 244 |             endrec.append(maxCommentStart + start) | 
| Amaury Forgeot d'Arc | 2407ac9 | 2009-01-17 22:43:50 +0000 | [diff] [blame] | 245 |  | 
 | 246 |             # Try to read the "Zip64 end of central directory" structure | 
 | 247 |             return _EndRecData64(fpin, maxCommentStart + start - filesize, | 
 | 248 |                                  endrec) | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 249 |  | 
 | 250 |     # Unable to find a valid end of central directory structure | 
 | 251 |     return | 
| Martin v. Löwis | 6f6873b | 2002-10-13 13:54:50 +0000 | [diff] [blame] | 252 |  | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 253 |  | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 254 | class ZipInfo (object): | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 255 |     """Class with attributes describing each file in the ZIP archive.""" | 
 | 256 |  | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 257 |     __slots__ = ( | 
 | 258 |             'orig_filename', | 
 | 259 |             'filename', | 
 | 260 |             'date_time', | 
 | 261 |             'compress_type', | 
 | 262 |             'comment', | 
 | 263 |             'extra', | 
 | 264 |             'create_system', | 
 | 265 |             'create_version', | 
 | 266 |             'extract_version', | 
 | 267 |             'reserved', | 
 | 268 |             'flag_bits', | 
 | 269 |             'volume', | 
 | 270 |             'internal_attr', | 
 | 271 |             'external_attr', | 
 | 272 |             'header_offset', | 
 | 273 |             'CRC', | 
 | 274 |             'compress_size', | 
 | 275 |             'file_size', | 
| Gregory P. Smith | 0c63fc2 | 2008-01-20 01:21:03 +0000 | [diff] [blame] | 276 |             '_raw_time', | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 277 |         ) | 
 | 278 |  | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 279 |     def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): | 
| Greg Ward | 8e36d28 | 2003-06-18 00:53:06 +0000 | [diff] [blame] | 280 |         self.orig_filename = filename   # Original file name in archive | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 281 |  | 
 | 282 |         # Terminate the file name at the first null byte.  Null bytes in file | 
 | 283 |         # names are used as tricks by viruses in archives. | 
| Greg Ward | 8e36d28 | 2003-06-18 00:53:06 +0000 | [diff] [blame] | 284 |         null_byte = filename.find(chr(0)) | 
 | 285 |         if null_byte >= 0: | 
 | 286 |             filename = filename[0:null_byte] | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 287 |         # This is used to ensure paths in generated ZIP files always use | 
 | 288 |         # forward slashes as the directory separator, as required by the | 
 | 289 |         # ZIP format specification. | 
 | 290 |         if os.sep != "/" and os.sep in filename: | 
| Greg Ward | 8e36d28 | 2003-06-18 00:53:06 +0000 | [diff] [blame] | 291 |             filename = filename.replace(os.sep, "/") | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 292 |  | 
| Greg Ward | 8e36d28 | 2003-06-18 00:53:06 +0000 | [diff] [blame] | 293 |         self.filename = filename        # Normalized file name | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 294 |         self.date_time = date_time      # year, month, day, hour, min, sec | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 295 |         # Standard values: | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 296 |         self.compress_type = ZIP_STORED # Type of compression for the file | 
 | 297 |         self.comment = ""               # Comment for each file | 
 | 298 |         self.extra = ""                 # ZIP extra data | 
| Martin v. Löwis | 0075690 | 2006-02-05 17:09:41 +0000 | [diff] [blame] | 299 |         if sys.platform == 'win32': | 
 | 300 |             self.create_system = 0          # System which created ZIP archive | 
 | 301 |         else: | 
 | 302 |             # Assume everything else is unix-y | 
 | 303 |             self.create_system = 3          # System which created ZIP archive | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 304 |         self.create_version = 20        # Version which created ZIP archive | 
 | 305 |         self.extract_version = 20       # Version needed to extract archive | 
 | 306 |         self.reserved = 0               # Must be zero | 
 | 307 |         self.flag_bits = 0              # ZIP flag bits | 
 | 308 |         self.volume = 0                 # Volume number of file header | 
 | 309 |         self.internal_attr = 0          # Internal attributes | 
 | 310 |         self.external_attr = 0          # External file attributes | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 311 |         # Other attributes are set by class ZipFile: | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 312 |         # header_offset         Byte offset to the file header | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 313 |         # CRC                   CRC-32 of the uncompressed file | 
 | 314 |         # compress_size         Size of the compressed file | 
 | 315 |         # file_size             Size of the uncompressed file | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 316 |  | 
 | 317 |     def FileHeader(self): | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 318 |         """Return the per-file header as a string.""" | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 319 |         dt = self.date_time | 
 | 320 |         dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] | 
| Tim Peters | 3caca23 | 2001-12-06 06:23:26 +0000 | [diff] [blame] | 321 |         dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 322 |         if self.flag_bits & 0x08: | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 323 |             # Set these to zero because we write them after the file data | 
 | 324 |             CRC = compress_size = file_size = 0 | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 325 |         else: | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 326 |             CRC = self.CRC | 
 | 327 |             compress_size = self.compress_size | 
 | 328 |             file_size = self.file_size | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 329 |  | 
 | 330 |         extra = self.extra | 
 | 331 |  | 
 | 332 |         if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: | 
 | 333 |             # File is larger than what fits into a 4 byte integer, | 
 | 334 |             # fall back to the ZIP64 extension | 
| Gregory P. Smith | bf02e3b | 2008-03-19 03:14:41 +0000 | [diff] [blame] | 335 |             fmt = '<HHQQ' | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 336 |             extra = extra + struct.pack(fmt, | 
 | 337 |                     1, struct.calcsize(fmt)-4, file_size, compress_size) | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 338 |             file_size = 0xffffffff | 
 | 339 |             compress_size = 0xffffffff | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 340 |             self.extract_version = max(45, self.extract_version) | 
 | 341 |             self.create_version = max(45, self.extract_version) | 
 | 342 |  | 
| Martin v. Löwis | 471617d | 2008-05-05 17:16:58 +0000 | [diff] [blame] | 343 |         filename, flag_bits = self._encodeFilenameFlags() | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 344 |         header = struct.pack(structFileHeader, stringFileHeader, | 
| Martin v. Löwis | 471617d | 2008-05-05 17:16:58 +0000 | [diff] [blame] | 345 |                  self.extract_version, self.reserved, flag_bits, | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 346 |                  self.compress_type, dostime, dosdate, CRC, | 
 | 347 |                  compress_size, file_size, | 
| Martin v. Löwis | 471617d | 2008-05-05 17:16:58 +0000 | [diff] [blame] | 348 |                  len(filename), len(extra)) | 
 | 349 |         return header + filename + extra | 
 | 350 |  | 
 | 351 |     def _encodeFilenameFlags(self): | 
 | 352 |         if isinstance(self.filename, unicode): | 
 | 353 |             try: | 
 | 354 |                 return self.filename.encode('ascii'), self.flag_bits | 
 | 355 |             except UnicodeEncodeError: | 
 | 356 |                 return self.filename.encode('utf-8'), self.flag_bits | 0x800 | 
 | 357 |         else: | 
 | 358 |             return self.filename, self.flag_bits | 
 | 359 |  | 
 | 360 |     def _decodeFilename(self): | 
 | 361 |         if self.flag_bits & 0x800: | 
 | 362 |             return self.filename.decode('utf-8') | 
 | 363 |         else: | 
 | 364 |             return self.filename | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 365 |  | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 366 |     def _decodeExtra(self): | 
 | 367 |         # Try to decode the extra field. | 
 | 368 |         extra = self.extra | 
 | 369 |         unpack = struct.unpack | 
 | 370 |         while extra: | 
| Gregory P. Smith | bf02e3b | 2008-03-19 03:14:41 +0000 | [diff] [blame] | 371 |             tp, ln = unpack('<HH', extra[:4]) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 372 |             if tp == 1: | 
 | 373 |                 if ln >= 24: | 
| Gregory P. Smith | bf02e3b | 2008-03-19 03:14:41 +0000 | [diff] [blame] | 374 |                     counts = unpack('<QQQ', extra[4:28]) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 375 |                 elif ln == 16: | 
| Gregory P. Smith | bf02e3b | 2008-03-19 03:14:41 +0000 | [diff] [blame] | 376 |                     counts = unpack('<QQ', extra[4:20]) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 377 |                 elif ln == 8: | 
| Gregory P. Smith | bf02e3b | 2008-03-19 03:14:41 +0000 | [diff] [blame] | 378 |                     counts = unpack('<Q', extra[4:12]) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 379 |                 elif ln == 0: | 
 | 380 |                     counts = () | 
 | 381 |                 else: | 
 | 382 |                     raise RuntimeError, "Corrupt extra field %s"%(ln,) | 
 | 383 |  | 
 | 384 |                 idx = 0 | 
 | 385 |  | 
 | 386 |                 # ZIP64 extension (large files and/or large archives) | 
| Gregory P. Smith | bf02e3b | 2008-03-19 03:14:41 +0000 | [diff] [blame] | 387 |                 if self.file_size in (0xffffffffffffffffL, 0xffffffffL): | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 388 |                     self.file_size = counts[idx] | 
 | 389 |                     idx += 1 | 
 | 390 |  | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 391 |                 if self.compress_size == 0xFFFFFFFFL: | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 392 |                     self.compress_size = counts[idx] | 
 | 393 |                     idx += 1 | 
 | 394 |  | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 395 |                 if self.header_offset == 0xffffffffL: | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 396 |                     old = self.header_offset | 
 | 397 |                     self.header_offset = counts[idx] | 
 | 398 |                     idx+=1 | 
 | 399 |  | 
 | 400 |             extra = extra[ln+4:] | 
| Tim Peters | a608bb2 | 2006-06-15 18:06:29 +0000 | [diff] [blame] | 401 |  | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 402 |  | 
| Martin v. Löwis | c6d626e | 2007-02-13 09:49:38 +0000 | [diff] [blame] | 403 | class _ZipDecrypter: | 
 | 404 |     """Class to handle decryption of files stored within a ZIP archive. | 
 | 405 |  | 
 | 406 |     ZIP supports a password-based form of encryption. Even though known | 
 | 407 |     plaintext attacks have been found against it, it is still useful | 
| Gregory P. Smith | da40723 | 2008-01-20 01:32:00 +0000 | [diff] [blame] | 408 |     to be able to get data out of such a file. | 
| Martin v. Löwis | c6d626e | 2007-02-13 09:49:38 +0000 | [diff] [blame] | 409 |  | 
 | 410 |     Usage: | 
 | 411 |         zd = _ZipDecrypter(mypwd) | 
 | 412 |         plain_char = zd(cypher_char) | 
 | 413 |         plain_text = map(zd, cypher_text) | 
 | 414 |     """ | 
 | 415 |  | 
 | 416 |     def _GenerateCRCTable(): | 
 | 417 |         """Generate a CRC-32 table. | 
 | 418 |  | 
 | 419 |         ZIP encryption uses the CRC32 one-byte primitive for scrambling some | 
 | 420 |         internal keys. We noticed that a direct implementation is faster than | 
 | 421 |         relying on binascii.crc32(). | 
 | 422 |         """ | 
 | 423 |         poly = 0xedb88320 | 
 | 424 |         table = [0] * 256 | 
 | 425 |         for i in range(256): | 
 | 426 |             crc = i | 
 | 427 |             for j in range(8): | 
 | 428 |                 if crc & 1: | 
 | 429 |                     crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly | 
 | 430 |                 else: | 
 | 431 |                     crc = ((crc >> 1) & 0x7FFFFFFF) | 
 | 432 |             table[i] = crc | 
 | 433 |         return table | 
 | 434 |     crctable = _GenerateCRCTable() | 
 | 435 |  | 
 | 436 |     def _crc32(self, ch, crc): | 
 | 437 |         """Compute the CRC32 primitive on one byte.""" | 
 | 438 |         return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff] | 
 | 439 |  | 
 | 440 |     def __init__(self, pwd): | 
 | 441 |         self.key0 = 305419896 | 
 | 442 |         self.key1 = 591751049 | 
 | 443 |         self.key2 = 878082192 | 
 | 444 |         for p in pwd: | 
 | 445 |             self._UpdateKeys(p) | 
 | 446 |  | 
 | 447 |     def _UpdateKeys(self, c): | 
 | 448 |         self.key0 = self._crc32(c, self.key0) | 
 | 449 |         self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 | 
 | 450 |         self.key1 = (self.key1 * 134775813 + 1) & 4294967295 | 
 | 451 |         self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2) | 
 | 452 |  | 
 | 453 |     def __call__(self, c): | 
 | 454 |         """Decrypt a single character.""" | 
 | 455 |         c = ord(c) | 
 | 456 |         k = self.key2 | 2 | 
 | 457 |         c = c ^ (((k * (k^1)) >> 8) & 255) | 
 | 458 |         c = chr(c) | 
 | 459 |         self._UpdateKeys(c) | 
 | 460 |         return c | 
 | 461 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 462 | class ZipExtFile(io.BufferedIOBase): | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 463 |     """File-like object for reading an archive member. | 
| Tim Peters | ea5962f | 2007-03-12 18:07:52 +0000 | [diff] [blame] | 464 |        Is returned by ZipFile.open(). | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 465 |     """ | 
| Tim Peters | ea5962f | 2007-03-12 18:07:52 +0000 | [diff] [blame] | 466 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 467 |     # Max size supported by decompressor. | 
 | 468 |     MAX_N = 1 << 31 - 1 | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 469 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 470 |     # Read from compressed files in 4k blocks. | 
 | 471 |     MIN_READ_SIZE = 4096 | 
| Tim Peters | ea5962f | 2007-03-12 18:07:52 +0000 | [diff] [blame] | 472 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 473 |     # Search for universal newlines or line chunks. | 
 | 474 |     PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)') | 
 | 475 |  | 
 | 476 |     def __init__(self, fileobj, mode, zipinfo, decrypter=None): | 
 | 477 |         self._fileobj = fileobj | 
 | 478 |         self._decrypter = decrypter | 
 | 479 |  | 
| Ezio Melotti | 4611b05 | 2010-01-28 01:41:30 +0000 | [diff] [blame] | 480 |         self._compress_type = zipinfo.compress_type | 
 | 481 |         self._compress_size = zipinfo.compress_size | 
 | 482 |         self._compress_left = zipinfo.compress_size | 
 | 483 |  | 
 | 484 |         if self._compress_type == ZIP_DEFLATED: | 
 | 485 |             self._decompressor = zlib.decompressobj(-15) | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 486 |         self._unconsumed = '' | 
 | 487 |  | 
 | 488 |         self._readbuffer = '' | 
 | 489 |         self._offset = 0 | 
 | 490 |  | 
 | 491 |         self._universal = 'U' in mode | 
 | 492 |         self.newlines = None | 
 | 493 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 494 |         # Adjust read size for encrypted files since the first 12 bytes | 
 | 495 |         # are for the encryption/password information. | 
 | 496 |         if self._decrypter is not None: | 
 | 497 |             self._compress_left -= 12 | 
 | 498 |  | 
 | 499 |         self.mode = mode | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 500 |         self.name = zipinfo.filename | 
 | 501 |  | 
| Antoine Pitrou | e1436d1 | 2010-08-12 15:25:51 +0000 | [diff] [blame] | 502 |         if hasattr(zipinfo, 'CRC'): | 
 | 503 |             self._expected_crc = zipinfo.CRC | 
 | 504 |             self._running_crc = crc32(b'') & 0xffffffff | 
 | 505 |         else: | 
 | 506 |             self._expected_crc = None | 
 | 507 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 508 |     def readline(self, limit=-1): | 
 | 509 |         """Read and return a line from the stream. | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 510 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 511 |         If limit is specified, at most limit bytes will be read. | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 512 |         """ | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 513 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 514 |         if not self._universal and limit < 0: | 
 | 515 |             # Shortcut common case - newline found in buffer. | 
 | 516 |             i = self._readbuffer.find('\n', self._offset) + 1 | 
 | 517 |             if i > 0: | 
 | 518 |                 line = self._readbuffer[self._offset: i] | 
 | 519 |                 self._offset = i | 
 | 520 |                 return line | 
| Tim Peters | ea5962f | 2007-03-12 18:07:52 +0000 | [diff] [blame] | 521 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 522 |         if not self._universal: | 
 | 523 |             return io.BufferedIOBase.readline(self, limit) | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 524 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 525 |         line = '' | 
 | 526 |         while limit < 0 or len(line) < limit: | 
 | 527 |             readahead = self.peek(2) | 
 | 528 |             if readahead == '': | 
 | 529 |                 return line | 
| Tim Peters | ea5962f | 2007-03-12 18:07:52 +0000 | [diff] [blame] | 530 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 531 |             # | 
 | 532 |             # Search for universal newlines or line chunks. | 
 | 533 |             # | 
 | 534 |             # The pattern returns either a line chunk or a newline, but not | 
 | 535 |             # both. Combined with peek(2), we are assured that the sequence | 
 | 536 |             # '\r\n' is always retrieved completely and never split into | 
 | 537 |             # separate newlines - '\r', '\n' due to coincidental readaheads. | 
 | 538 |             # | 
 | 539 |             match = self.PATTERN.search(readahead) | 
 | 540 |             newline = match.group('newline') | 
 | 541 |             if newline is not None: | 
 | 542 |                 if self.newlines is None: | 
 | 543 |                     self.newlines = [] | 
 | 544 |                 if newline not in self.newlines: | 
 | 545 |                     self.newlines.append(newline) | 
 | 546 |                 self._offset += len(newline) | 
 | 547 |                 return line + '\n' | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 548 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 549 |             chunk = match.group('chunk') | 
 | 550 |             if limit >= 0: | 
 | 551 |                 chunk = chunk[: limit - len(line)] | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 552 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 553 |             self._offset += len(chunk) | 
 | 554 |             line += chunk | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 555 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 556 |         return line | 
 | 557 |  | 
 | 558 |     def peek(self, n=1): | 
 | 559 |         """Returns buffered bytes without advancing the position.""" | 
 | 560 |         if n > len(self._readbuffer) - self._offset: | 
 | 561 |             chunk = self.read(n) | 
 | 562 |             self._offset -= len(chunk) | 
 | 563 |  | 
 | 564 |         # Return up to 512 bytes to reduce allocation overhead for tight loops. | 
 | 565 |         return self._readbuffer[self._offset: self._offset + 512] | 
 | 566 |  | 
 | 567 |     def readable(self): | 
 | 568 |         return True | 
 | 569 |  | 
 | 570 |     def read(self, n=-1): | 
 | 571 |         """Read and return up to n bytes. | 
 | 572 |         If the argument is omitted, None, or negative, data is read and returned until EOF is reached.. | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 573 |         """ | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 574 |         buf = '' | 
| Antoine Pitrou | e4195e8 | 2010-09-12 14:56:27 +0000 | [diff] [blame] | 575 |         if n is None: | 
 | 576 |             n = -1 | 
 | 577 |         while True: | 
 | 578 |             if n < 0: | 
 | 579 |                 data = self.read1(n) | 
 | 580 |             elif n > len(buf): | 
 | 581 |                 data = self.read1(n - len(buf)) | 
 | 582 |             else: | 
 | 583 |                 return buf | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 584 |             if len(data) == 0: | 
 | 585 |                 return buf | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 586 |             buf += data | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 587 |  | 
| Antoine Pitrou | e1436d1 | 2010-08-12 15:25:51 +0000 | [diff] [blame] | 588 |     def _update_crc(self, newdata, eof): | 
 | 589 |         # Update the CRC using the given data. | 
 | 590 |         if self._expected_crc is None: | 
 | 591 |             # No need to compute the CRC if we don't have a reference value | 
 | 592 |             return | 
 | 593 |         self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff | 
 | 594 |         # Check the CRC if we're at the end of the file | 
 | 595 |         if eof and self._running_crc != self._expected_crc: | 
 | 596 |             raise BadZipfile("Bad CRC-32 for file %r" % self.name) | 
 | 597 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 598 |     def read1(self, n): | 
 | 599 |         """Read up to n bytes with at most one read() system call.""" | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 600 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 601 |         # Simplify algorithm (branching) by transforming negative n to large n. | 
 | 602 |         if n < 0 or n is None: | 
 | 603 |             n = self.MAX_N | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 604 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 605 |         # Bytes available in read buffer. | 
 | 606 |         len_readbuffer = len(self._readbuffer) - self._offset | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 607 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 608 |         # Read from file. | 
 | 609 |         if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed): | 
 | 610 |             nbytes = n - len_readbuffer - len(self._unconsumed) | 
 | 611 |             nbytes = max(nbytes, self.MIN_READ_SIZE) | 
 | 612 |             nbytes = min(nbytes, self._compress_left) | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 613 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 614 |             data = self._fileobj.read(nbytes) | 
 | 615 |             self._compress_left -= len(data) | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 616 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 617 |             if data and self._decrypter is not None: | 
 | 618 |                 data = ''.join(map(self._decrypter, data)) | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 619 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 620 |             if self._compress_type == ZIP_STORED: | 
| Antoine Pitrou | e1436d1 | 2010-08-12 15:25:51 +0000 | [diff] [blame] | 621 |                 self._update_crc(data, eof=(self._compress_left==0)) | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 622 |                 self._readbuffer = self._readbuffer[self._offset:] + data | 
 | 623 |                 self._offset = 0 | 
 | 624 |             else: | 
 | 625 |                 # Prepare deflated bytes for decompression. | 
 | 626 |                 self._unconsumed += data | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 627 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 628 |         # Handle unconsumed data. | 
| Ezio Melotti | 4611b05 | 2010-01-28 01:41:30 +0000 | [diff] [blame] | 629 |         if (len(self._unconsumed) > 0 and n > len_readbuffer and | 
 | 630 |             self._compress_type == ZIP_DEFLATED): | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 631 |             data = self._decompressor.decompress( | 
 | 632 |                 self._unconsumed, | 
 | 633 |                 max(n - len_readbuffer, self.MIN_READ_SIZE) | 
 | 634 |             ) | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 635 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 636 |             self._unconsumed = self._decompressor.unconsumed_tail | 
| Antoine Pitrou | e1436d1 | 2010-08-12 15:25:51 +0000 | [diff] [blame] | 637 |             eof = len(self._unconsumed) == 0 and self._compress_left == 0 | 
 | 638 |             if eof: | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 639 |                 data += self._decompressor.flush() | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 640 |  | 
| Antoine Pitrou | e1436d1 | 2010-08-12 15:25:51 +0000 | [diff] [blame] | 641 |             self._update_crc(data, eof=eof) | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 642 |             self._readbuffer = self._readbuffer[self._offset:] + data | 
 | 643 |             self._offset = 0 | 
 | 644 |  | 
 | 645 |         # Read from buffer. | 
 | 646 |         data = self._readbuffer[self._offset: self._offset + n] | 
 | 647 |         self._offset += len(data) | 
 | 648 |         return data | 
 | 649 |  | 
| Tim Peters | ea5962f | 2007-03-12 18:07:52 +0000 | [diff] [blame] | 650 |  | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 651 |  | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 652 | class ZipFile: | 
| Tim Peters | a19a168 | 2001-03-29 04:36:09 +0000 | [diff] [blame] | 653 |     """ Class with methods to open, read, write, close, list zip files. | 
 | 654 |  | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 655 |     z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False) | 
| Tim Peters | a19a168 | 2001-03-29 04:36:09 +0000 | [diff] [blame] | 656 |  | 
| Fred Drake | 3d9091e | 2001-03-26 15:49:24 +0000 | [diff] [blame] | 657 |     file: Either the path to the file, or a file-like object. | 
 | 658 |           If it is a path, the file will be opened and closed by ZipFile. | 
 | 659 |     mode: The mode can be either read "r", write "w" or append "a". | 
 | 660 |     compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib). | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 661 |     allowZip64: if True ZipFile will create files with ZIP64 extensions when | 
 | 662 |                 needed, otherwise it will raise an exception when this would | 
 | 663 |                 be necessary. | 
 | 664 |  | 
| Fred Drake | 3d9091e | 2001-03-26 15:49:24 +0000 | [diff] [blame] | 665 |     """ | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 666 |  | 
| Fred Drake | 90eac28 | 2001-02-28 05:29:34 +0000 | [diff] [blame] | 667 |     fp = None                   # Set here since __del__ checks it | 
 | 668 |  | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 669 |     def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False): | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 670 |         """Open the ZIP file with mode read "r", write "w" or append "a".""" | 
| Georg Brandl | 4b3ab6f | 2007-07-12 09:59:22 +0000 | [diff] [blame] | 671 |         if mode not in ("r", "w", "a"): | 
 | 672 |             raise RuntimeError('ZipFile() requires mode "r", "w", or "a"') | 
 | 673 |  | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 674 |         if compression == ZIP_STORED: | 
 | 675 |             pass | 
 | 676 |         elif compression == ZIP_DEFLATED: | 
 | 677 |             if not zlib: | 
 | 678 |                 raise RuntimeError,\ | 
| Fred Drake | 5db246d | 2000-09-29 20:44:48 +0000 | [diff] [blame] | 679 |                       "Compression requires the (missing) zlib module" | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 680 |         else: | 
 | 681 |             raise RuntimeError, "That compression method is not supported" | 
| Georg Brandl | 4b3ab6f | 2007-07-12 09:59:22 +0000 | [diff] [blame] | 682 |  | 
 | 683 |         self._allowZip64 = allowZip64 | 
 | 684 |         self._didModify = False | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 685 |         self.debug = 0  # Level of printing: 0 through 3 | 
 | 686 |         self.NameToInfo = {}    # Find file info given name | 
 | 687 |         self.filelist = []      # List of ZipInfo instances for archive | 
 | 688 |         self.compression = compression  # Method of compression | 
| Raymond Hettinger | 2ca7c19 | 2005-02-16 09:27:49 +0000 | [diff] [blame] | 689 |         self.mode = key = mode.replace('b', '')[0] | 
| Martin v. Löwis | c6d626e | 2007-02-13 09:49:38 +0000 | [diff] [blame] | 690 |         self.pwd = None | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 691 |         self.comment = '' | 
| Tim Peters | a19a168 | 2001-03-29 04:36:09 +0000 | [diff] [blame] | 692 |  | 
| Fred Drake | 3d9091e | 2001-03-26 15:49:24 +0000 | [diff] [blame] | 693 |         # Check if we were passed a file-like object | 
| Walter Dörwald | 65230a2 | 2002-06-03 15:58:32 +0000 | [diff] [blame] | 694 |         if isinstance(file, basestring): | 
| Fred Drake | 3d9091e | 2001-03-26 15:49:24 +0000 | [diff] [blame] | 695 |             self._filePassed = 0 | 
 | 696 |             self.filename = file | 
 | 697 |             modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} | 
| Martin v. Löwis | 84f6de9 | 2007-02-13 10:10:39 +0000 | [diff] [blame] | 698 |             try: | 
 | 699 |                 self.fp = open(file, modeDict[mode]) | 
 | 700 |             except IOError: | 
 | 701 |                 if mode == 'a': | 
 | 702 |                     mode = key = 'w' | 
 | 703 |                     self.fp = open(file, modeDict[mode]) | 
 | 704 |                 else: | 
 | 705 |                     raise | 
| Fred Drake | 3d9091e | 2001-03-26 15:49:24 +0000 | [diff] [blame] | 706 |         else: | 
 | 707 |             self._filePassed = 1 | 
 | 708 |             self.fp = file | 
 | 709 |             self.filename = getattr(file, 'name', None) | 
| Tim Peters | a19a168 | 2001-03-29 04:36:09 +0000 | [diff] [blame] | 710 |  | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 711 |         if key == 'r': | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 712 |             self._GetContents() | 
 | 713 |         elif key == 'w': | 
| Georg Brandl | 86e0c89 | 2010-11-26 07:22:28 +0000 | [diff] [blame^] | 714 |             # set the modified flag so central directory gets written | 
 | 715 |             # even if no files are added to the archive | 
 | 716 |             self._didModify = True | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 717 |         elif key == 'a': | 
| Georg Brandl | 86e0c89 | 2010-11-26 07:22:28 +0000 | [diff] [blame^] | 718 |             try: | 
 | 719 |                 # See if file is a zip file | 
| Martin v. Löwis | 6f6873b | 2002-10-13 13:54:50 +0000 | [diff] [blame] | 720 |                 self._RealGetContents() | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 721 |                 # seek to start of directory and overwrite | 
| Martin v. Löwis | 6f6873b | 2002-10-13 13:54:50 +0000 | [diff] [blame] | 722 |                 self.fp.seek(self.start_dir, 0) | 
| Georg Brandl | 86e0c89 | 2010-11-26 07:22:28 +0000 | [diff] [blame^] | 723 |             except BadZipfile: | 
 | 724 |                 # file is not a zip file, just append | 
| Martin v. Löwis | 6f6873b | 2002-10-13 13:54:50 +0000 | [diff] [blame] | 725 |                 self.fp.seek(0, 2) | 
| Georg Brandl | 86e0c89 | 2010-11-26 07:22:28 +0000 | [diff] [blame^] | 726 |  | 
 | 727 |                 # set the modified flag so central directory gets written | 
 | 728 |                 # even if no files are added to the archive | 
 | 729 |                 self._didModify = True | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 730 |         else: | 
| Tim Peters | 7d3bad6 | 2001-04-04 18:56:49 +0000 | [diff] [blame] | 731 |             if not self._filePassed: | 
 | 732 |                 self.fp.close() | 
 | 733 |                 self.fp = None | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 734 |             raise RuntimeError, 'Mode must be "r", "w" or "a"' | 
 | 735 |  | 
| Ezio Melotti | 569e61f | 2009-12-30 06:14:51 +0000 | [diff] [blame] | 736 |     def __enter__(self): | 
 | 737 |         return self | 
 | 738 |  | 
 | 739 |     def __exit__(self, type, value, traceback): | 
 | 740 |         self.close() | 
 | 741 |  | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 742 |     def _GetContents(self): | 
| Tim Peters | 7d3bad6 | 2001-04-04 18:56:49 +0000 | [diff] [blame] | 743 |         """Read the directory, making sure we close the file if the format | 
 | 744 |         is bad.""" | 
 | 745 |         try: | 
 | 746 |             self._RealGetContents() | 
 | 747 |         except BadZipfile: | 
 | 748 |             if not self._filePassed: | 
 | 749 |                 self.fp.close() | 
 | 750 |                 self.fp = None | 
 | 751 |             raise | 
 | 752 |  | 
 | 753 |     def _RealGetContents(self): | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 754 |         """Read in the table of contents for the ZIP file.""" | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 755 |         fp = self.fp | 
| Georg Brandl | 86e0c89 | 2010-11-26 07:22:28 +0000 | [diff] [blame^] | 756 |         try: | 
 | 757 |             endrec = _EndRecData(fp) | 
 | 758 |         except IOError: | 
 | 759 |             raise BadZipfile("File is not a zip file") | 
| Martin v. Löwis | 6f6873b | 2002-10-13 13:54:50 +0000 | [diff] [blame] | 760 |         if not endrec: | 
 | 761 |             raise BadZipfile, "File is not a zip file" | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 762 |         if self.debug > 1: | 
 | 763 |             print endrec | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 764 |         size_cd = endrec[_ECD_SIZE]             # bytes in central directory | 
 | 765 |         offset_cd = endrec[_ECD_OFFSET]         # offset of central directory | 
 | 766 |         self.comment = endrec[_ECD_COMMENT]     # archive comment | 
 | 767 |  | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 768 |         # "concat" is zero, unless zip was concatenated to another file | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 769 |         concat = endrec[_ECD_LOCATION] - size_cd - offset_cd | 
| Antoine Pitrou | ebcd0ce | 2008-09-05 23:30:23 +0000 | [diff] [blame] | 770 |         if endrec[_ECD_SIGNATURE] == stringEndArchive64: | 
 | 771 |             # If Zip64 extension structures are present, account for them | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 772 |             concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) | 
 | 773 |  | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 774 |         if self.debug > 2: | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 775 |             inferred = concat + offset_cd | 
 | 776 |             print "given, inferred, offset", offset_cd, inferred, concat | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 777 |         # self.start_dir:  Position of start of central directory | 
 | 778 |         self.start_dir = offset_cd + concat | 
 | 779 |         fp.seek(self.start_dir, 0) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 780 |         data = fp.read(size_cd) | 
 | 781 |         fp = cStringIO.StringIO(data) | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 782 |         total = 0 | 
 | 783 |         while total < size_cd: | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 784 |             centdir = fp.read(sizeCentralDir) | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 785 |             if centdir[0:4] != stringCentralDir: | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 786 |                 raise BadZipfile, "Bad magic number for central directory" | 
 | 787 |             centdir = struct.unpack(structCentralDir, centdir) | 
 | 788 |             if self.debug > 2: | 
 | 789 |                 print centdir | 
| Fred Drake | 3e038e5 | 2001-02-28 17:56:26 +0000 | [diff] [blame] | 790 |             filename = fp.read(centdir[_CD_FILENAME_LENGTH]) | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 791 |             # Create ZipInfo instance to store file information | 
 | 792 |             x = ZipInfo(filename) | 
| Fred Drake | 3e038e5 | 2001-02-28 17:56:26 +0000 | [diff] [blame] | 793 |             x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) | 
 | 794 |             x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 795 |             x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 796 |             (x.create_version, x.create_system, x.extract_version, x.reserved, | 
 | 797 |                 x.flag_bits, x.compress_type, t, d, | 
 | 798 |                 x.CRC, x.compress_size, x.file_size) = centdir[1:12] | 
 | 799 |             x.volume, x.internal_attr, x.external_attr = centdir[15:18] | 
 | 800 |             # Convert date/time code to (year, month, day, hour, min, sec) | 
| Gregory P. Smith | 0c63fc2 | 2008-01-20 01:21:03 +0000 | [diff] [blame] | 801 |             x._raw_time = t | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 802 |             x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, | 
| Fred Drake | 414ca66 | 2000-06-13 18:49:53 +0000 | [diff] [blame] | 803 |                                      t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 804 |  | 
 | 805 |             x._decodeExtra() | 
 | 806 |             x.header_offset = x.header_offset + concat | 
| Martin v. Löwis | 471617d | 2008-05-05 17:16:58 +0000 | [diff] [blame] | 807 |             x.filename = x._decodeFilename() | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 808 |             self.filelist.append(x) | 
 | 809 |             self.NameToInfo[x.filename] = x | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 810 |  | 
 | 811 |             # update total bytes read from central directory | 
 | 812 |             total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] | 
 | 813 |                      + centdir[_CD_EXTRA_FIELD_LENGTH] | 
 | 814 |                      + centdir[_CD_COMMENT_LENGTH]) | 
 | 815 |  | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 816 |             if self.debug > 2: | 
 | 817 |                 print "total", total | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 818 |  | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 819 |  | 
 | 820 |     def namelist(self): | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 821 |         """Return a list of file names in the archive.""" | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 822 |         l = [] | 
 | 823 |         for data in self.filelist: | 
 | 824 |             l.append(data.filename) | 
 | 825 |         return l | 
 | 826 |  | 
 | 827 |     def infolist(self): | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 828 |         """Return a list of class ZipInfo instances for files in the | 
 | 829 |         archive.""" | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 830 |         return self.filelist | 
 | 831 |  | 
 | 832 |     def printdir(self): | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 833 |         """Print a table of contents for the zip file.""" | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 834 |         print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size") | 
 | 835 |         for zinfo in self.filelist: | 
| Raymond Hettinger | 351e1a3 | 2008-01-14 22:58:05 +0000 | [diff] [blame] | 836 |             date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 837 |             print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size) | 
 | 838 |  | 
 | 839 |     def testzip(self): | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 840 |         """Read all the files and check the CRC.""" | 
| Antoine Pitrou | c534270 | 2008-08-17 13:06:29 +0000 | [diff] [blame] | 841 |         chunk_size = 2 ** 20 | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 842 |         for zinfo in self.filelist: | 
 | 843 |             try: | 
| Antoine Pitrou | c534270 | 2008-08-17 13:06:29 +0000 | [diff] [blame] | 844 |                 # Read by chunks, to avoid an OverflowError or a | 
 | 845 |                 # MemoryError with very large embedded files. | 
 | 846 |                 f = self.open(zinfo.filename, "r") | 
 | 847 |                 while f.read(chunk_size):     # Check CRC-32 | 
 | 848 |                     pass | 
| Raymond Hettinger | c0fac96 | 2003-06-27 22:25:03 +0000 | [diff] [blame] | 849 |             except BadZipfile: | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 850 |                 return zinfo.filename | 
 | 851 |  | 
 | 852 |     def getinfo(self, name): | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 853 |         """Return the instance of ZipInfo given 'name'.""" | 
| Georg Brandl | 4b3ab6f | 2007-07-12 09:59:22 +0000 | [diff] [blame] | 854 |         info = self.NameToInfo.get(name) | 
 | 855 |         if info is None: | 
 | 856 |             raise KeyError( | 
 | 857 |                 'There is no item named %r in the archive' % name) | 
 | 858 |  | 
 | 859 |         return info | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 860 |  | 
| Martin v. Löwis | c6d626e | 2007-02-13 09:49:38 +0000 | [diff] [blame] | 861 |     def setpassword(self, pwd): | 
 | 862 |         """Set default password for encrypted files.""" | 
 | 863 |         self.pwd = pwd | 
 | 864 |  | 
 | 865 |     def read(self, name, pwd=None): | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 866 |         """Return file bytes (as a string) for name.""" | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 867 |         return self.open(name, "r", pwd).read() | 
 | 868 |  | 
 | 869 |     def open(self, name, mode="r", pwd=None): | 
 | 870 |         """Return file-like object for 'name'.""" | 
 | 871 |         if mode not in ("r", "U", "rU"): | 
 | 872 |             raise RuntimeError, 'open() requires mode "r", "U", or "rU"' | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 873 |         if not self.fp: | 
 | 874 |             raise RuntimeError, \ | 
| Fred Drake | 5db246d | 2000-09-29 20:44:48 +0000 | [diff] [blame] | 875 |                   "Attempt to read ZIP archive that was already closed" | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 876 |  | 
| Tim Peters | ea5962f | 2007-03-12 18:07:52 +0000 | [diff] [blame] | 877 |         # Only open a new file for instances where we were not | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 878 |         # given a file object in the constructor | 
 | 879 |         if self._filePassed: | 
 | 880 |             zef_file = self.fp | 
 | 881 |         else: | 
 | 882 |             zef_file = open(self.filename, 'rb') | 
 | 883 |  | 
| Georg Brandl | 112aa50 | 2008-05-20 08:25:48 +0000 | [diff] [blame] | 884 |         # Make sure we have an info object | 
 | 885 |         if isinstance(name, ZipInfo): | 
 | 886 |             # 'name' is already an info object | 
 | 887 |             zinfo = name | 
 | 888 |         else: | 
 | 889 |             # Get info object for name | 
 | 890 |             zinfo = self.getinfo(name) | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 891 |  | 
 | 892 |         zef_file.seek(zinfo.header_offset, 0) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 893 |  | 
 | 894 |         # Skip the file header: | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 895 |         fheader = zef_file.read(sizeFileHeader) | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 896 |         if fheader[0:4] != stringFileHeader: | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 897 |             raise BadZipfile, "Bad magic number for file header" | 
 | 898 |  | 
 | 899 |         fheader = struct.unpack(structFileHeader, fheader) | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 900 |         fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 901 |         if fheader[_FH_EXTRA_FIELD_LENGTH]: | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 902 |             zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 903 |  | 
 | 904 |         if fname != zinfo.orig_filename: | 
 | 905 |             raise BadZipfile, \ | 
 | 906 |                       'File name in directory "%s" and header "%s" differ.' % ( | 
 | 907 |                           zinfo.orig_filename, fname) | 
 | 908 |  | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 909 |         # check for encrypted flag & handle password | 
 | 910 |         is_encrypted = zinfo.flag_bits & 0x1 | 
 | 911 |         zd = None | 
| Martin v. Löwis | c6d626e | 2007-02-13 09:49:38 +0000 | [diff] [blame] | 912 |         if is_encrypted: | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 913 |             if not pwd: | 
 | 914 |                 pwd = self.pwd | 
 | 915 |             if not pwd: | 
 | 916 |                 raise RuntimeError, "File %s is encrypted, " \ | 
 | 917 |                       "password required for extraction" % name | 
 | 918 |  | 
| Martin v. Löwis | c6d626e | 2007-02-13 09:49:38 +0000 | [diff] [blame] | 919 |             zd = _ZipDecrypter(pwd) | 
 | 920 |             # The first 12 bytes in the cypher stream is an encryption header | 
 | 921 |             #  used to strengthen the algorithm. The first 11 bytes are | 
 | 922 |             #  completely random, while the 12th contains the MSB of the CRC, | 
| Gregory P. Smith | 0c63fc2 | 2008-01-20 01:21:03 +0000 | [diff] [blame] | 923 |             #  or the MSB of the file time depending on the header type | 
| Martin v. Löwis | c6d626e | 2007-02-13 09:49:38 +0000 | [diff] [blame] | 924 |             #  and is used to check the correctness of the password. | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 925 |             bytes = zef_file.read(12) | 
| Martin v. Löwis | c6d626e | 2007-02-13 09:49:38 +0000 | [diff] [blame] | 926 |             h = map(zd, bytes[0:12]) | 
| Gregory P. Smith | 0c63fc2 | 2008-01-20 01:21:03 +0000 | [diff] [blame] | 927 |             if zinfo.flag_bits & 0x8: | 
 | 928 |                 # compare against the file type from extended local headers | 
 | 929 |                 check_byte = (zinfo._raw_time >> 8) & 0xff | 
 | 930 |             else: | 
 | 931 |                 # compare against the CRC otherwise | 
 | 932 |                 check_byte = (zinfo.CRC >> 24) & 0xff | 
 | 933 |             if ord(h[11]) != check_byte: | 
 | 934 |                 raise RuntimeError("Bad password for file", name) | 
| Martin v. Löwis | 3eb7648 | 2007-03-06 10:41:24 +0000 | [diff] [blame] | 935 |  | 
| Antoine Pitrou | 94c33eb | 2010-01-27 20:59:50 +0000 | [diff] [blame] | 936 |         return  ZipExtFile(zef_file, mode, zinfo, zd) | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 937 |  | 
| Georg Brandl | 62416bc | 2008-01-07 18:47:44 +0000 | [diff] [blame] | 938 |     def extract(self, member, path=None, pwd=None): | 
 | 939 |         """Extract a member from the archive to the current working directory, | 
 | 940 |            using its full name. Its file information is extracted as accurately | 
 | 941 |            as possible. `member' may be a filename or a ZipInfo object. You can | 
 | 942 |            specify a different directory using `path'. | 
 | 943 |         """ | 
 | 944 |         if not isinstance(member, ZipInfo): | 
 | 945 |             member = self.getinfo(member) | 
 | 946 |  | 
 | 947 |         if path is None: | 
 | 948 |             path = os.getcwd() | 
 | 949 |  | 
 | 950 |         return self._extract_member(member, path, pwd) | 
 | 951 |  | 
 | 952 |     def extractall(self, path=None, members=None, pwd=None): | 
 | 953 |         """Extract all members from the archive to the current working | 
 | 954 |            directory. `path' specifies a different directory to extract to. | 
 | 955 |            `members' is optional and must be a subset of the list returned | 
 | 956 |            by namelist(). | 
 | 957 |         """ | 
 | 958 |         if members is None: | 
 | 959 |             members = self.namelist() | 
 | 960 |  | 
 | 961 |         for zipinfo in members: | 
 | 962 |             self.extract(zipinfo, path, pwd) | 
 | 963 |  | 
 | 964 |     def _extract_member(self, member, targetpath, pwd): | 
 | 965 |         """Extract the ZipInfo object 'member' to a physical | 
 | 966 |            file on the path targetpath. | 
 | 967 |         """ | 
 | 968 |         # build the destination pathname, replacing | 
 | 969 |         # forward slashes to platform specific separators. | 
| Antoine Pitrou | 97377bf | 2009-05-04 21:17:17 +0000 | [diff] [blame] | 970 |         # Strip trailing path separator, unless it represents the root. | 
 | 971 |         if (targetpath[-1:] in (os.path.sep, os.path.altsep) | 
 | 972 |             and len(os.path.splitdrive(targetpath)[1]) > 1): | 
| Georg Brandl | 62416bc | 2008-01-07 18:47:44 +0000 | [diff] [blame] | 973 |             targetpath = targetpath[:-1] | 
 | 974 |  | 
 | 975 |         # don't include leading "/" from file name if present | 
| Martin v. Löwis | 0dfcfc8 | 2009-01-24 14:00:33 +0000 | [diff] [blame] | 976 |         if member.filename[0] == '/': | 
| Georg Brandl | 62416bc | 2008-01-07 18:47:44 +0000 | [diff] [blame] | 977 |             targetpath = os.path.join(targetpath, member.filename[1:]) | 
 | 978 |         else: | 
 | 979 |             targetpath = os.path.join(targetpath, member.filename) | 
 | 980 |  | 
 | 981 |         targetpath = os.path.normpath(targetpath) | 
 | 982 |  | 
 | 983 |         # Create all upper directories if necessary. | 
 | 984 |         upperdirs = os.path.dirname(targetpath) | 
 | 985 |         if upperdirs and not os.path.exists(upperdirs): | 
 | 986 |             os.makedirs(upperdirs) | 
 | 987 |  | 
| Martin v. Löwis | 0dfcfc8 | 2009-01-24 14:00:33 +0000 | [diff] [blame] | 988 |         if member.filename[-1] == '/': | 
| Martin v. Löwis | 0b09c42 | 2009-05-24 19:30:52 +0000 | [diff] [blame] | 989 |             if not os.path.isdir(targetpath): | 
 | 990 |                 os.mkdir(targetpath) | 
| Martin v. Löwis | 0dfcfc8 | 2009-01-24 14:00:33 +0000 | [diff] [blame] | 991 |             return targetpath | 
 | 992 |  | 
| Georg Brandl | 112aa50 | 2008-05-20 08:25:48 +0000 | [diff] [blame] | 993 |         source = self.open(member, pwd=pwd) | 
| Georg Brandl | 62416bc | 2008-01-07 18:47:44 +0000 | [diff] [blame] | 994 |         target = file(targetpath, "wb") | 
 | 995 |         shutil.copyfileobj(source, target) | 
 | 996 |         source.close() | 
 | 997 |         target.close() | 
 | 998 |  | 
 | 999 |         return targetpath | 
 | 1000 |  | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1001 |     def _writecheck(self, zinfo): | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 1002 |         """Check for errors before writing a file to the archive.""" | 
| Raymond Hettinger | 54f0222 | 2002-06-01 14:18:47 +0000 | [diff] [blame] | 1003 |         if zinfo.filename in self.NameToInfo: | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 1004 |             if self.debug:      # Warning for duplicate names | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1005 |                 print "Duplicate name:", zinfo.filename | 
 | 1006 |         if self.mode not in ("w", "a"): | 
 | 1007 |             raise RuntimeError, 'write() requires mode "w" or "a"' | 
 | 1008 |         if not self.fp: | 
 | 1009 |             raise RuntimeError, \ | 
| Fred Drake | 5db246d | 2000-09-29 20:44:48 +0000 | [diff] [blame] | 1010 |                   "Attempt to write ZIP archive that was already closed" | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1011 |         if zinfo.compress_type == ZIP_DEFLATED and not zlib: | 
 | 1012 |             raise RuntimeError, \ | 
| Fred Drake | 5db246d | 2000-09-29 20:44:48 +0000 | [diff] [blame] | 1013 |                   "Compression requires the (missing) zlib module" | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1014 |         if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED): | 
 | 1015 |             raise RuntimeError, \ | 
| Fred Drake | 5db246d | 2000-09-29 20:44:48 +0000 | [diff] [blame] | 1016 |                   "That compression method is not supported" | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1017 |         if zinfo.file_size > ZIP64_LIMIT: | 
 | 1018 |             if not self._allowZip64: | 
 | 1019 |                 raise LargeZipFile("Filesize would require ZIP64 extensions") | 
 | 1020 |         if zinfo.header_offset > ZIP64_LIMIT: | 
 | 1021 |             if not self._allowZip64: | 
 | 1022 |                 raise LargeZipFile("Zipfile size would require ZIP64 extensions") | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1023 |  | 
 | 1024 |     def write(self, filename, arcname=None, compress_type=None): | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 1025 |         """Put the bytes from filename into the archive under the name | 
 | 1026 |         arcname.""" | 
| Georg Brandl | 4b3ab6f | 2007-07-12 09:59:22 +0000 | [diff] [blame] | 1027 |         if not self.fp: | 
 | 1028 |             raise RuntimeError( | 
 | 1029 |                   "Attempt to write to ZIP archive that was already closed") | 
 | 1030 |  | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1031 |         st = os.stat(filename) | 
| Martin v. Löwis | 0dfcfc8 | 2009-01-24 14:00:33 +0000 | [diff] [blame] | 1032 |         isdir = stat.S_ISDIR(st.st_mode) | 
| Raymond Hettinger | 32200ae | 2002-06-01 19:51:15 +0000 | [diff] [blame] | 1033 |         mtime = time.localtime(st.st_mtime) | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1034 |         date_time = mtime[0:6] | 
 | 1035 |         # Create ZipInfo instance to store file information | 
 | 1036 |         if arcname is None: | 
| Georg Brandl | 8f7c54e | 2006-02-20 08:40:38 +0000 | [diff] [blame] | 1037 |             arcname = filename | 
 | 1038 |         arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) | 
 | 1039 |         while arcname[0] in (os.sep, os.altsep): | 
 | 1040 |             arcname = arcname[1:] | 
| Martin v. Löwis | 0dfcfc8 | 2009-01-24 14:00:33 +0000 | [diff] [blame] | 1041 |         if isdir: | 
 | 1042 |             arcname += '/' | 
| Georg Brandl | 8f7c54e | 2006-02-20 08:40:38 +0000 | [diff] [blame] | 1043 |         zinfo = ZipInfo(arcname, date_time) | 
| Andrew M. Kuchling | 5543021 | 2004-07-10 15:40:29 +0000 | [diff] [blame] | 1044 |         zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1045 |         if compress_type is None: | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 1046 |             zinfo.compress_type = self.compression | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1047 |         else: | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 1048 |             zinfo.compress_type = compress_type | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1049 |  | 
 | 1050 |         zinfo.file_size = st.st_size | 
| Finn Bock | 03a3bb8 | 2001-09-05 18:40:33 +0000 | [diff] [blame] | 1051 |         zinfo.flag_bits = 0x00 | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 1052 |         zinfo.header_offset = self.fp.tell()    # Start of header bytes | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1053 |  | 
 | 1054 |         self._writecheck(zinfo) | 
 | 1055 |         self._didModify = True | 
| Martin v. Löwis | 0dfcfc8 | 2009-01-24 14:00:33 +0000 | [diff] [blame] | 1056 |  | 
 | 1057 |         if isdir: | 
 | 1058 |             zinfo.file_size = 0 | 
 | 1059 |             zinfo.compress_size = 0 | 
 | 1060 |             zinfo.CRC = 0 | 
 | 1061 |             self.filelist.append(zinfo) | 
 | 1062 |             self.NameToInfo[zinfo.filename] = zinfo | 
 | 1063 |             self.fp.write(zinfo.FileHeader()) | 
 | 1064 |             return | 
 | 1065 |  | 
| Benjamin Peterson | b91e8ed | 2009-05-10 02:29:00 +0000 | [diff] [blame] | 1066 |         with open(filename, "rb") as fp: | 
 | 1067 |             # Must overwrite CRC and sizes with correct data later | 
 | 1068 |             zinfo.CRC = CRC = 0 | 
 | 1069 |             zinfo.compress_size = compress_size = 0 | 
 | 1070 |             zinfo.file_size = file_size = 0 | 
 | 1071 |             self.fp.write(zinfo.FileHeader()) | 
 | 1072 |             if zinfo.compress_type == ZIP_DEFLATED: | 
 | 1073 |                 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, | 
 | 1074 |                      zlib.DEFLATED, -15) | 
 | 1075 |             else: | 
 | 1076 |                 cmpr = None | 
 | 1077 |             while 1: | 
 | 1078 |                 buf = fp.read(1024 * 8) | 
 | 1079 |                 if not buf: | 
 | 1080 |                     break | 
 | 1081 |                 file_size = file_size + len(buf) | 
 | 1082 |                 CRC = crc32(buf, CRC) & 0xffffffff | 
 | 1083 |                 if cmpr: | 
 | 1084 |                     buf = cmpr.compress(buf) | 
 | 1085 |                     compress_size = compress_size + len(buf) | 
 | 1086 |                 self.fp.write(buf) | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1087 |         if cmpr: | 
 | 1088 |             buf = cmpr.flush() | 
 | 1089 |             compress_size = compress_size + len(buf) | 
 | 1090 |             self.fp.write(buf) | 
 | 1091 |             zinfo.compress_size = compress_size | 
 | 1092 |         else: | 
 | 1093 |             zinfo.compress_size = file_size | 
 | 1094 |         zinfo.CRC = CRC | 
 | 1095 |         zinfo.file_size = file_size | 
| Finn Bock | 03a3bb8 | 2001-09-05 18:40:33 +0000 | [diff] [blame] | 1096 |         # Seek backwards and write CRC and file sizes | 
| Tim Peters | b64bec3 | 2001-09-18 02:26:39 +0000 | [diff] [blame] | 1097 |         position = self.fp.tell()       # Preserve current position in file | 
| Finn Bock | 03a3bb8 | 2001-09-05 18:40:33 +0000 | [diff] [blame] | 1098 |         self.fp.seek(zinfo.header_offset + 14, 0) | 
| Gregory P. Smith | bf02e3b | 2008-03-19 03:14:41 +0000 | [diff] [blame] | 1099 |         self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1100 |               zinfo.file_size)) | 
| Finn Bock | 03a3bb8 | 2001-09-05 18:40:33 +0000 | [diff] [blame] | 1101 |         self.fp.seek(position, 0) | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1102 |         self.filelist.append(zinfo) | 
 | 1103 |         self.NameToInfo[zinfo.filename] = zinfo | 
 | 1104 |  | 
| Ronald Oussoren | dd25e86 | 2010-02-07 20:18:02 +0000 | [diff] [blame] | 1105 |     def writestr(self, zinfo_or_arcname, bytes, compress_type=None): | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 1106 |         """Write a file into the archive.  The contents is the string | 
| Just van Rossum | b083cb3 | 2002-12-12 12:23:32 +0000 | [diff] [blame] | 1107 |         'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or | 
 | 1108 |         the name of the file in the archive.""" | 
 | 1109 |         if not isinstance(zinfo_or_arcname, ZipInfo): | 
 | 1110 |             zinfo = ZipInfo(filename=zinfo_or_arcname, | 
| Raymond Hettinger | 351e1a3 | 2008-01-14 22:58:05 +0000 | [diff] [blame] | 1111 |                             date_time=time.localtime(time.time())[:6]) | 
| Ronald Oussoren | dd25e86 | 2010-02-07 20:18:02 +0000 | [diff] [blame] | 1112 |  | 
| Just van Rossum | b083cb3 | 2002-12-12 12:23:32 +0000 | [diff] [blame] | 1113 |             zinfo.compress_type = self.compression | 
| Antoine Pitrou | 5fdfa3e | 2008-07-25 19:42:26 +0000 | [diff] [blame] | 1114 |             zinfo.external_attr = 0600 << 16 | 
| Just van Rossum | b083cb3 | 2002-12-12 12:23:32 +0000 | [diff] [blame] | 1115 |         else: | 
 | 1116 |             zinfo = zinfo_or_arcname | 
| Georg Brandl | 4b3ab6f | 2007-07-12 09:59:22 +0000 | [diff] [blame] | 1117 |  | 
 | 1118 |         if not self.fp: | 
 | 1119 |             raise RuntimeError( | 
 | 1120 |                   "Attempt to write to ZIP archive that was already closed") | 
 | 1121 |  | 
| Ronald Oussoren | dd25e86 | 2010-02-07 20:18:02 +0000 | [diff] [blame] | 1122 |         if compress_type is not None: | 
 | 1123 |             zinfo.compress_type = compress_type | 
 | 1124 |  | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 1125 |         zinfo.file_size = len(bytes)            # Uncompressed size | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1126 |         zinfo.header_offset = self.fp.tell()    # Start of header bytes | 
 | 1127 |         self._writecheck(zinfo) | 
 | 1128 |         self._didModify = True | 
| Gregory P. Smith | bf02e3b | 2008-03-19 03:14:41 +0000 | [diff] [blame] | 1129 |         zinfo.CRC = crc32(bytes) & 0xffffffff       # CRC-32 checksum | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1130 |         if zinfo.compress_type == ZIP_DEFLATED: | 
 | 1131 |             co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, | 
 | 1132 |                  zlib.DEFLATED, -15) | 
 | 1133 |             bytes = co.compress(bytes) + co.flush() | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 1134 |             zinfo.compress_size = len(bytes)    # Compressed size | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1135 |         else: | 
 | 1136 |             zinfo.compress_size = zinfo.file_size | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 1137 |         zinfo.header_offset = self.fp.tell()    # Start of header bytes | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1138 |         self.fp.write(zinfo.FileHeader()) | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1139 |         self.fp.write(bytes) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1140 |         self.fp.flush() | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1141 |         if zinfo.flag_bits & 0x08: | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 1142 |             # Write CRC and file sizes after the file data | 
| Gregory P. Smith | 2662733 | 2009-06-26 07:50:21 +0000 | [diff] [blame] | 1143 |             self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 1144 |                   zinfo.file_size)) | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1145 |         self.filelist.append(zinfo) | 
 | 1146 |         self.NameToInfo[zinfo.filename] = zinfo | 
 | 1147 |  | 
 | 1148 |     def __del__(self): | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 1149 |         """Call the "close()" method in case the user forgot.""" | 
| Tim Peters | d15f8bb | 2001-11-28 23:16:40 +0000 | [diff] [blame] | 1150 |         self.close() | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1151 |  | 
 | 1152 |     def close(self): | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 1153 |         """Close the file, and for mode "w" and "a" write the ending | 
 | 1154 |         records.""" | 
| Tim Peters | d15f8bb | 2001-11-28 23:16:40 +0000 | [diff] [blame] | 1155 |         if self.fp is None: | 
 | 1156 |             return | 
| Tim Peters | a608bb2 | 2006-06-15 18:06:29 +0000 | [diff] [blame] | 1157 |  | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1158 |         if self.mode in ("w", "a") and self._didModify: # write ending records | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1159 |             count = 0 | 
 | 1160 |             pos1 = self.fp.tell() | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 1161 |             for zinfo in self.filelist:         # write central directory | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1162 |                 count = count + 1 | 
 | 1163 |                 dt = zinfo.date_time | 
 | 1164 |                 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] | 
| Tim Peters | 3caca23 | 2001-12-06 06:23:26 +0000 | [diff] [blame] | 1165 |                 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1166 |                 extra = [] | 
 | 1167 |                 if zinfo.file_size > ZIP64_LIMIT \ | 
 | 1168 |                         or zinfo.compress_size > ZIP64_LIMIT: | 
 | 1169 |                     extra.append(zinfo.file_size) | 
 | 1170 |                     extra.append(zinfo.compress_size) | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 1171 |                     file_size = 0xffffffff | 
 | 1172 |                     compress_size = 0xffffffff | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1173 |                 else: | 
 | 1174 |                     file_size = zinfo.file_size | 
 | 1175 |                     compress_size = zinfo.compress_size | 
 | 1176 |  | 
 | 1177 |                 if zinfo.header_offset > ZIP64_LIMIT: | 
 | 1178 |                     extra.append(zinfo.header_offset) | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 1179 |                     header_offset = 0xffffffffL | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1180 |                 else: | 
 | 1181 |                     header_offset = zinfo.header_offset | 
 | 1182 |  | 
 | 1183 |                 extra_data = zinfo.extra | 
 | 1184 |                 if extra: | 
 | 1185 |                     # Append a ZIP64 field to the extra's | 
 | 1186 |                     extra_data = struct.pack( | 
| Gregory P. Smith | b89a096 | 2008-03-19 01:46:10 +0000 | [diff] [blame] | 1187 |                             '<HH' + 'Q'*len(extra), | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1188 |                             1, 8*len(extra), *extra) + extra_data | 
| Tim Peters | a608bb2 | 2006-06-15 18:06:29 +0000 | [diff] [blame] | 1189 |  | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1190 |                     extract_version = max(45, zinfo.extract_version) | 
 | 1191 |                     create_version = max(45, zinfo.create_version) | 
 | 1192 |                 else: | 
 | 1193 |                     extract_version = zinfo.extract_version | 
 | 1194 |                     create_version = zinfo.create_version | 
 | 1195 |  | 
| Gregory P. Smith | bf02e3b | 2008-03-19 03:14:41 +0000 | [diff] [blame] | 1196 |                 try: | 
| Martin v. Löwis | 471617d | 2008-05-05 17:16:58 +0000 | [diff] [blame] | 1197 |                     filename, flag_bits = zinfo._encodeFilenameFlags() | 
| Gregory P. Smith | bf02e3b | 2008-03-19 03:14:41 +0000 | [diff] [blame] | 1198 |                     centdir = struct.pack(structCentralDir, | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 1199 |                      stringCentralDir, create_version, | 
| Gregory P. Smith | bf02e3b | 2008-03-19 03:14:41 +0000 | [diff] [blame] | 1200 |                      zinfo.create_system, extract_version, zinfo.reserved, | 
| Martin v. Löwis | 471617d | 2008-05-05 17:16:58 +0000 | [diff] [blame] | 1201 |                      flag_bits, zinfo.compress_type, dostime, dosdate, | 
| Gregory P. Smith | bf02e3b | 2008-03-19 03:14:41 +0000 | [diff] [blame] | 1202 |                      zinfo.CRC, compress_size, file_size, | 
| Martin v. Löwis | 471617d | 2008-05-05 17:16:58 +0000 | [diff] [blame] | 1203 |                      len(filename), len(extra_data), len(zinfo.comment), | 
| Gregory P. Smith | bf02e3b | 2008-03-19 03:14:41 +0000 | [diff] [blame] | 1204 |                      0, zinfo.internal_attr, zinfo.external_attr, | 
 | 1205 |                      header_offset) | 
 | 1206 |                 except DeprecationWarning: | 
 | 1207 |                     print >>sys.stderr, (structCentralDir, | 
 | 1208 |                      stringCentralDir, create_version, | 
 | 1209 |                      zinfo.create_system, extract_version, zinfo.reserved, | 
 | 1210 |                      zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, | 
 | 1211 |                      zinfo.CRC, compress_size, file_size, | 
 | 1212 |                      len(zinfo.filename), len(extra_data), len(zinfo.comment), | 
 | 1213 |                      0, zinfo.internal_attr, zinfo.external_attr, | 
 | 1214 |                      header_offset) | 
 | 1215 |                     raise | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1216 |                 self.fp.write(centdir) | 
| Martin v. Löwis | 471617d | 2008-05-05 17:16:58 +0000 | [diff] [blame] | 1217 |                 self.fp.write(filename) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1218 |                 self.fp.write(extra_data) | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1219 |                 self.fp.write(zinfo.comment) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1220 |  | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1221 |             pos2 = self.fp.tell() | 
 | 1222 |             # Write end-of-zip-archive record | 
| Amaury Forgeot d'Arc | d25f87a | 2009-01-17 16:40:17 +0000 | [diff] [blame] | 1223 |             centDirCount = count | 
 | 1224 |             centDirSize = pos2 - pos1 | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 1225 |             centDirOffset = pos1 | 
| Amaury Forgeot d'Arc | d25f87a | 2009-01-17 16:40:17 +0000 | [diff] [blame] | 1226 |             if (centDirCount >= ZIP_FILECOUNT_LIMIT or | 
 | 1227 |                 centDirOffset > ZIP64_LIMIT or | 
 | 1228 |                 centDirSize > ZIP64_LIMIT): | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1229 |                 # Need to write the ZIP64 end-of-archive records | 
 | 1230 |                 zip64endrec = struct.pack( | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 1231 |                         structEndArchive64, stringEndArchive64, | 
| Amaury Forgeot d'Arc | d25f87a | 2009-01-17 16:40:17 +0000 | [diff] [blame] | 1232 |                         44, 45, 45, 0, 0, centDirCount, centDirCount, | 
 | 1233 |                         centDirSize, centDirOffset) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1234 |                 self.fp.write(zip64endrec) | 
 | 1235 |  | 
 | 1236 |                 zip64locrec = struct.pack( | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 1237 |                         structEndArchive64Locator, | 
 | 1238 |                         stringEndArchive64Locator, 0, pos2, 1) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1239 |                 self.fp.write(zip64locrec) | 
| Amaury Forgeot d'Arc | d25f87a | 2009-01-17 16:40:17 +0000 | [diff] [blame] | 1240 |                 centDirCount = min(centDirCount, 0xFFFF) | 
 | 1241 |                 centDirSize = min(centDirSize, 0xFFFFFFFF) | 
 | 1242 |                 centDirOffset = min(centDirOffset, 0xFFFFFFFF) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1243 |  | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 1244 |             # check for valid comment length | 
 | 1245 |             if len(self.comment) >= ZIP_MAX_COMMENT: | 
 | 1246 |                 if self.debug > 0: | 
 | 1247 |                     msg = 'Archive comment is too long; truncating to %d bytes' \ | 
 | 1248 |                           % ZIP_MAX_COMMENT | 
 | 1249 |                 self.comment = self.comment[:ZIP_MAX_COMMENT] | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1250 |  | 
| Amaury Forgeot d'Arc | ae6d2b9 | 2008-07-11 21:28:25 +0000 | [diff] [blame] | 1251 |             endrec = struct.pack(structEndArchive, stringEndArchive, | 
| Amaury Forgeot d'Arc | d25f87a | 2009-01-17 16:40:17 +0000 | [diff] [blame] | 1252 |                                  0, 0, centDirCount, centDirCount, | 
 | 1253 |                                  centDirSize, centDirOffset, len(self.comment)) | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 1254 |             self.fp.write(endrec) | 
 | 1255 |             self.fp.write(self.comment) | 
| Guido van Rossum | f85af61 | 2001-04-14 16:45:14 +0000 | [diff] [blame] | 1256 |             self.fp.flush() | 
| Martin v. Löwis | 8c43641 | 2008-07-03 12:51:14 +0000 | [diff] [blame] | 1257 |  | 
| Fred Drake | 3d9091e | 2001-03-26 15:49:24 +0000 | [diff] [blame] | 1258 |         if not self._filePassed: | 
 | 1259 |             self.fp.close() | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1260 |         self.fp = None | 
 | 1261 |  | 
 | 1262 |  | 
 | 1263 | class PyZipFile(ZipFile): | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 1264 |     """Class to create ZIP archives with Python library files and packages.""" | 
 | 1265 |  | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1266 |     def writepy(self, pathname, basename = ""): | 
 | 1267 |         """Add all files from "pathname" to the ZIP archive. | 
 | 1268 |  | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 1269 |         If pathname is a package directory, search the directory and | 
 | 1270 |         all package subdirectories recursively for all *.py and enter | 
 | 1271 |         the modules into the archive.  If pathname is a plain | 
 | 1272 |         directory, listdir *.py and enter all modules.  Else, pathname | 
 | 1273 |         must be a Python *.py file and the module will be put into the | 
 | 1274 |         archive.  Added modules are always module.pyo or module.pyc. | 
 | 1275 |         This method will compile the module.py into module.pyc if | 
 | 1276 |         necessary. | 
 | 1277 |         """ | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1278 |         dir, name = os.path.split(pathname) | 
 | 1279 |         if os.path.isdir(pathname): | 
 | 1280 |             initname = os.path.join(pathname, "__init__.py") | 
 | 1281 |             if os.path.isfile(initname): | 
 | 1282 |                 # This is a package directory, add it | 
 | 1283 |                 if basename: | 
 | 1284 |                     basename = "%s/%s" % (basename, name) | 
 | 1285 |                 else: | 
 | 1286 |                     basename = name | 
 | 1287 |                 if self.debug: | 
 | 1288 |                     print "Adding package in", pathname, "as", basename | 
 | 1289 |                 fname, arcname = self._get_codename(initname[0:-3], basename) | 
 | 1290 |                 if self.debug: | 
 | 1291 |                     print "Adding", arcname | 
 | 1292 |                 self.write(fname, arcname) | 
 | 1293 |                 dirlist = os.listdir(pathname) | 
 | 1294 |                 dirlist.remove("__init__.py") | 
 | 1295 |                 # Add all *.py files and package subdirectories | 
 | 1296 |                 for filename in dirlist: | 
 | 1297 |                     path = os.path.join(pathname, filename) | 
 | 1298 |                     root, ext = os.path.splitext(filename) | 
 | 1299 |                     if os.path.isdir(path): | 
 | 1300 |                         if os.path.isfile(os.path.join(path, "__init__.py")): | 
 | 1301 |                             # This is a package directory, add it | 
 | 1302 |                             self.writepy(path, basename)  # Recursive call | 
 | 1303 |                     elif ext == ".py": | 
 | 1304 |                         fname, arcname = self._get_codename(path[0:-3], | 
 | 1305 |                                          basename) | 
 | 1306 |                         if self.debug: | 
 | 1307 |                             print "Adding", arcname | 
 | 1308 |                         self.write(fname, arcname) | 
 | 1309 |             else: | 
 | 1310 |                 # This is NOT a package directory, add its files at top level | 
 | 1311 |                 if self.debug: | 
 | 1312 |                     print "Adding files from directory", pathname | 
 | 1313 |                 for filename in os.listdir(pathname): | 
 | 1314 |                     path = os.path.join(pathname, filename) | 
 | 1315 |                     root, ext = os.path.splitext(filename) | 
 | 1316 |                     if ext == ".py": | 
 | 1317 |                         fname, arcname = self._get_codename(path[0:-3], | 
 | 1318 |                                          basename) | 
 | 1319 |                         if self.debug: | 
 | 1320 |                             print "Adding", arcname | 
 | 1321 |                         self.write(fname, arcname) | 
 | 1322 |         else: | 
 | 1323 |             if pathname[-3:] != ".py": | 
 | 1324 |                 raise RuntimeError, \ | 
| Fred Drake | 5db246d | 2000-09-29 20:44:48 +0000 | [diff] [blame] | 1325 |                       'Files added with writepy() must end with ".py"' | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1326 |             fname, arcname = self._get_codename(pathname[0:-3], basename) | 
 | 1327 |             if self.debug: | 
 | 1328 |                 print "Adding file", arcname | 
 | 1329 |             self.write(fname, arcname) | 
 | 1330 |  | 
 | 1331 |     def _get_codename(self, pathname, basename): | 
 | 1332 |         """Return (filename, archivename) for the path. | 
 | 1333 |  | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 1334 |         Given a module name path, return the correct file path and | 
 | 1335 |         archive name, compiling if necessary.  For example, given | 
 | 1336 |         /python/lib/string, return (/python/lib/string.pyc, string). | 
 | 1337 |         """ | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1338 |         file_py  = pathname + ".py" | 
 | 1339 |         file_pyc = pathname + ".pyc" | 
 | 1340 |         file_pyo = pathname + ".pyo" | 
 | 1341 |         if os.path.isfile(file_pyo) and \ | 
| Raymond Hettinger | 32200ae | 2002-06-01 19:51:15 +0000 | [diff] [blame] | 1342 |                             os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime: | 
| Tim Peters | e119006 | 2001-01-15 03:34:38 +0000 | [diff] [blame] | 1343 |             fname = file_pyo    # Use .pyo file | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1344 |         elif not os.path.isfile(file_pyc) or \ | 
| Raymond Hettinger | 32200ae | 2002-06-01 19:51:15 +0000 | [diff] [blame] | 1345 |              os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime: | 
| Fred Drake | 484d735 | 2000-10-02 21:14:52 +0000 | [diff] [blame] | 1346 |             import py_compile | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1347 |             if self.debug: | 
 | 1348 |                 print "Compiling", file_py | 
| Martin v. Löwis | 0c6774d | 2003-01-15 11:51:06 +0000 | [diff] [blame] | 1349 |             try: | 
 | 1350 |                 py_compile.compile(file_py, file_pyc, None, True) | 
 | 1351 |             except py_compile.PyCompileError,err: | 
 | 1352 |                 print err.msg | 
| Guido van Rossum | 32abe6f | 2000-03-31 17:30:02 +0000 | [diff] [blame] | 1353 |             fname = file_pyc | 
 | 1354 |         else: | 
 | 1355 |             fname = file_pyc | 
 | 1356 |         archivename = os.path.split(fname)[1] | 
 | 1357 |         if basename: | 
 | 1358 |             archivename = "%s/%s" % (basename, archivename) | 
 | 1359 |         return (fname, archivename) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1360 |  | 
 | 1361 |  | 
 | 1362 | def main(args = None): | 
 | 1363 |     import textwrap | 
 | 1364 |     USAGE=textwrap.dedent("""\ | 
 | 1365 |         Usage: | 
 | 1366 |             zipfile.py -l zipfile.zip        # Show listing of a zipfile | 
 | 1367 |             zipfile.py -t zipfile.zip        # Test if a zipfile is valid | 
 | 1368 |             zipfile.py -e zipfile.zip target # Extract zipfile into target dir | 
 | 1369 |             zipfile.py -c zipfile.zip src ... # Create zipfile from sources | 
 | 1370 |         """) | 
 | 1371 |     if args is None: | 
 | 1372 |         args = sys.argv[1:] | 
 | 1373 |  | 
 | 1374 |     if not args or args[0] not in ('-l', '-c', '-e', '-t'): | 
 | 1375 |         print USAGE | 
 | 1376 |         sys.exit(1) | 
 | 1377 |  | 
 | 1378 |     if args[0] == '-l': | 
 | 1379 |         if len(args) != 2: | 
 | 1380 |             print USAGE | 
 | 1381 |             sys.exit(1) | 
 | 1382 |         zf = ZipFile(args[1], 'r') | 
 | 1383 |         zf.printdir() | 
 | 1384 |         zf.close() | 
 | 1385 |  | 
 | 1386 |     elif args[0] == '-t': | 
 | 1387 |         if len(args) != 2: | 
 | 1388 |             print USAGE | 
 | 1389 |             sys.exit(1) | 
 | 1390 |         zf = ZipFile(args[1], 'r') | 
| Antoine Pitrou | e1436d1 | 2010-08-12 15:25:51 +0000 | [diff] [blame] | 1391 |         badfile = zf.testzip() | 
 | 1392 |         if badfile: | 
 | 1393 |             print("The following enclosed file is corrupted: {!r}".format(badfile)) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1394 |         print "Done testing" | 
 | 1395 |  | 
 | 1396 |     elif args[0] == '-e': | 
 | 1397 |         if len(args) != 3: | 
 | 1398 |             print USAGE | 
 | 1399 |             sys.exit(1) | 
 | 1400 |  | 
 | 1401 |         zf = ZipFile(args[1], 'r') | 
 | 1402 |         out = args[2] | 
 | 1403 |         for path in zf.namelist(): | 
| Tim Peters | a608bb2 | 2006-06-15 18:06:29 +0000 | [diff] [blame] | 1404 |             if path.startswith('./'): | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1405 |                 tgt = os.path.join(out, path[2:]) | 
 | 1406 |             else: | 
 | 1407 |                 tgt = os.path.join(out, path) | 
 | 1408 |  | 
 | 1409 |             tgtdir = os.path.dirname(tgt) | 
 | 1410 |             if not os.path.exists(tgtdir): | 
 | 1411 |                 os.makedirs(tgtdir) | 
| Benjamin Peterson | b91e8ed | 2009-05-10 02:29:00 +0000 | [diff] [blame] | 1412 |             with open(tgt, 'wb') as fp: | 
 | 1413 |                 fp.write(zf.read(path)) | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1414 |         zf.close() | 
 | 1415 |  | 
 | 1416 |     elif args[0] == '-c': | 
 | 1417 |         if len(args) < 3: | 
 | 1418 |             print USAGE | 
 | 1419 |             sys.exit(1) | 
 | 1420 |  | 
 | 1421 |         def addToZip(zf, path, zippath): | 
 | 1422 |             if os.path.isfile(path): | 
 | 1423 |                 zf.write(path, zippath, ZIP_DEFLATED) | 
 | 1424 |             elif os.path.isdir(path): | 
 | 1425 |                 for nm in os.listdir(path): | 
| Tim Peters | a608bb2 | 2006-06-15 18:06:29 +0000 | [diff] [blame] | 1426 |                     addToZip(zf, | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1427 |                             os.path.join(path, nm), os.path.join(zippath, nm)) | 
| Tim Peters | a608bb2 | 2006-06-15 18:06:29 +0000 | [diff] [blame] | 1428 |             # else: ignore | 
| Ronald Oussoren | 143cefb | 2006-06-15 08:14:18 +0000 | [diff] [blame] | 1429 |  | 
 | 1430 |         zf = ZipFile(args[1], 'w', allowZip64=True) | 
 | 1431 |         for src in args[2:]: | 
 | 1432 |             addToZip(zf, src, os.path.basename(src)) | 
 | 1433 |  | 
 | 1434 |         zf.close() | 
 | 1435 |  | 
 | 1436 | if __name__ == "__main__": | 
 | 1437 |     main() |