blob: 2bcb0ab679a28a722a7c028d84617ef6dfd301a9 [file] [log] [blame]
The Android Open Source Project52d4c302009-03-03 19:29:09 -08001#!/usr/bin/python2.4
2#
3# Copyright (C) 2008 Google Inc.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""Module to compress directories in to series of zip files.
19
20This module will take a directory and compress all its contents, including
21child directories into a series of zip files named N.zip where 'N' ranges from
220 to infinity. The zip files will all be below a certain specified maximum
23threshold.
24
25The directory is compressed with a depth first traversal, each directory's
26file contents being compressed as it is visisted, before the compression of any
27child directory's contents. In this way the files within an archive are ordered
28and the archives themselves are ordered.
29
30The class also constructs a 'main.py' file intended for use with Google App
31Engine with a custom App Engine program not currently distributed with this
32code base. The custom App Engine runtime can leverage the index files written
33out by this class to more quickly locate which zip file to serve a given URL
34from.
35"""
36
37__author__ = 'jmatt@google.com (Justin Mattson)'
38
The Android Open Source Project6ffae012009-03-18 17:39:43 -070039import optparse
The Android Open Source Project52d4c302009-03-03 19:29:09 -080040import os
41import stat
42import sys
43import zipfile
The Android Open Source Project52d4c302009-03-03 19:29:09 -080044import divide_and_compress_constants
45
46
The Android Open Source Project52d4c302009-03-03 19:29:09 -080047def CreateOptionsParser():
The Android Open Source Project6ffae012009-03-18 17:39:43 -070048 """Creates the parser for command line arguments.
49
50 Returns:
51 A configured optparse.OptionParser object.
52 """
53 rtn = optparse.OptionParser()
The Android Open Source Project52d4c302009-03-03 19:29:09 -080054 rtn.add_option('-s', '--sourcefiles', dest='sourcefiles', default=None,
55 help='The directory containing the files to compress')
56 rtn.add_option('-d', '--destination', dest='destination', default=None,
57 help=('Where to put the archive files, this should not be'
58 ' a child of where the source files exist.'))
59 rtn.add_option('-f', '--filesize', dest='filesize', default='1M',
The Android Open Source Project6ffae012009-03-18 17:39:43 -070060 help=('Maximum size of archive files. A number followed by '
61 'a magnitude indicator either "B", "K", "M", or "G". '
62 'Examples:\n 1000000B == one million BYTES\n'
63 ' 1.2M == one point two MEGABYTES\n'
64 ' 1M == 1048576 BYTES'))
The Android Open Source Project52d4c302009-03-03 19:29:09 -080065 rtn.add_option('-n', '--nocompress', action='store_false', dest='compress',
The Android Open Source Project6ffae012009-03-18 17:39:43 -070066 default=True,
The Android Open Source Project52d4c302009-03-03 19:29:09 -080067 help=('Whether the archive files should be compressed, or '
68 'just a concatenation of the source files'))
69 return rtn
70
71
72def VerifyArguments(options, parser):
The Android Open Source Project6ffae012009-03-18 17:39:43 -070073 """Runs simple checks on correctness of commandline arguments.
74
75 Args:
76 options: The command line options passed.
77 parser: The parser object used to parse the command string.
78 """
The Android Open Source Project52d4c302009-03-03 19:29:09 -080079 try:
80 if options.sourcefiles is None or options.destination is None:
81 parser.print_help()
82 sys.exit(-1)
The Android Open Source Project6ffae012009-03-18 17:39:43 -070083 except AttributeError:
The Android Open Source Project52d4c302009-03-03 19:29:09 -080084 parser.print_help()
85 sys.exit(-1)
86
87
88def ParseSize(size_str):
The Android Open Source Project6ffae012009-03-18 17:39:43 -070089 """Parse the file size argument from a string to a number of bytes.
90
91 Args:
92 size_str: The string representation of the file size.
93
94 Returns:
95 The file size in bytes.
96
97 Raises:
98 ValueError: Raises an error if the numeric or qualifier portions of the
99 file size argument is invalid.
100 """
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800101 if len(size_str) < 2:
102 raise ValueError(('filesize argument not understood, please include'
103 ' a numeric value and magnitude indicator'))
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700104 magnitude = size_str[-1]
105 if not magnitude in ('B', 'K', 'M', 'G'):
106 raise ValueError(('filesize magnitude indicator not valid, must be "B",'
107 '"K","M", or "G"'))
108 numeral = float(size_str[:-1])
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800109 if magnitude == 'K':
110 numeral *= 1024
111 elif magnitude == 'M':
112 numeral *= 1048576
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700113 elif magnitude == 'G':
114 numeral *= 1073741824
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800115 return int(numeral)
116
117
118class DirectoryZipper(object):
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700119 """Class to compress a directory and all its sub-directories."""
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800120
121 def __init__(self, output_path, base_dir, archive_size, enable_compression):
122 """DirectoryZipper constructor.
123
124 Args:
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700125 output_path: A string, the path to write the archives and index file to.
126 base_dir: A string, the directory to compress.
127 archive_size: An number, the maximum size, in bytes, of a single
128 archive file.
129 enable_compression: A boolean, whether or not compression should be
130 enabled, if disabled, the files will be written into an uncompresed
131 zip.
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800132 """
133 self.output_dir = output_path
134 self.current_archive = '0.zip'
135 self.base_path = base_dir
136 self.max_size = archive_size
137 self.compress = enable_compression
138
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700139 # Set index_fp to None, because we don't know what it will be yet.
140 self.index_fp = None
141
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800142 def StartCompress(self):
143 """Start compress of the directory.
144
145 This will start the compression process and write the archives to the
146 specified output directory. It will also produce an 'index.txt' file in the
147 output directory that maps from file to archive.
148 """
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700149 self.index_fp = open(os.path.join(self.output_dir, 'main.py'), 'w')
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800150 self.index_fp.write(divide_and_compress_constants.file_preamble)
151 os.path.walk(self.base_path, self.CompressDirectory, 1)
152 self.index_fp.write(divide_and_compress_constants.file_endpiece)
153 self.index_fp.close()
154
155 def RemoveLastFile(self, archive_path=None):
156 """Removes the last item in the archive.
157
158 This removes the last item in the archive by reading the items out of the
159 archive, adding them to a new archive, deleting the old archive, and
160 moving the new archive to the location of the old archive.
161
162 Args:
163 archive_path: Path to the archive to modify. This archive should not be
164 open elsewhere, since it will need to be deleted.
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700165
166 Returns:
167 A new ZipFile object that points to the modified archive file.
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800168 """
169 if archive_path is None:
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700170 archive_path = os.path.join(self.output_dir, self.current_archive)
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800171
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700172 # Move the old file and create a new one at its old location.
173 root, ext = os.path.splitext(archive_path)
174 old_archive = ''.join([root, '-old', ext])
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800175 os.rename(archive_path, old_archive)
176 old_fp = self.OpenZipFileAtPath(old_archive, mode='r')
177
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700178 # By default, store uncompressed.
179 compress_bit = zipfile.ZIP_STORED
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800180 if self.compress:
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700181 compress_bit = zipfile.ZIP_DEFLATED
182 new_fp = self.OpenZipFileAtPath(archive_path,
183 mode='w',
184 compress=compress_bit)
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800185
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700186 # Read the old archive in a new archive, except the last one.
187 for zip_member in old_fp.infolist()[:-1]:
188 new_fp.writestr(zip_member, old_fp.read(zip_member.filename))
189
190 # Close files and delete the old one.
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800191 old_fp.close()
192 new_fp.close()
193 os.unlink(old_archive)
194
195 def OpenZipFileAtPath(self, path, mode=None, compress=zipfile.ZIP_DEFLATED):
196 """This method is mainly for testing purposes, eg dependency injection."""
197 if mode is None:
198 if os.path.exists(path):
199 mode = 'a'
200 else:
201 mode = 'w'
202
203 if mode == 'r':
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700204 return zipfile.ZipFile(path, mode)
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800205 else:
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700206 return zipfile.ZipFile(path, mode, compress)
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800207
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700208 def CompressDirectory(self, unused_id, dir_path, dir_contents):
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800209 """Method to compress the given directory.
210
211 This method compresses the directory 'dir_path'. It will add to an existing
212 zip file that still has space and create new ones as necessary to keep zip
213 file sizes under the maximum specified size. This also writes out the
214 mapping of files to archives to the self.index_fp file descriptor
215
216 Args:
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700217 unused_id: A numeric identifier passed by the os.path.walk method, this
218 is not used by this method.
219 dir_path: A string, the path to the directory to compress.
220 dir_contents: A list of directory contents to be compressed.
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800221 """
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700222 # Construct the queue of files to be added that this method will use
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800223 # it seems that dir_contents is given in reverse alphabetical order,
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700224 # so put them in alphabetical order by inserting to front of the list.
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800225 dir_contents.sort()
226 zip_queue = []
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700227 for filename in dir_contents:
228 zip_queue.append(os.path.join(dir_path, filename))
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800229 compress_bit = zipfile.ZIP_DEFLATED
230 if not self.compress:
231 compress_bit = zipfile.ZIP_STORED
232
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700233 # Zip all files in this directory, adding to existing archives and creating
234 # as necessary.
235 while zip_queue:
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800236 target_file = zip_queue[0]
237 if os.path.isfile(target_file):
238 self.AddFileToArchive(target_file, compress_bit)
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700239
240 # See if adding the new file made our archive too large.
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800241 if not self.ArchiveIsValid():
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700242
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800243 # IF fixing fails, the last added file was to large, skip it
244 # ELSE the current archive filled normally, make a new one and try
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700245 # adding the file again.
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800246 if not self.FixArchive('SIZE'):
247 zip_queue.pop(0)
248 else:
249 self.current_archive = '%i.zip' % (
250 int(self.current_archive[
251 0:self.current_archive.rfind('.zip')]) + 1)
252 else:
253
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700254 # Write an index record if necessary.
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800255 self.WriteIndexRecord()
256 zip_queue.pop(0)
257 else:
258 zip_queue.pop(0)
259
260 def WriteIndexRecord(self):
261 """Write an index record to the index file.
262
263 Only write an index record if this is the first file to go into archive
264
265 Returns:
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700266 True if an archive record is written, False if it isn't.
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800267 """
268 archive = self.OpenZipFileAtPath(
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700269 os.path.join(self.output_dir, self.current_archive), 'r')
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800270 archive_index = archive.infolist()
271 if len(archive_index) == 1:
272 self.index_fp.write(
273 '[\'%s\', \'%s\'],\n' % (self.current_archive,
274 archive_index[0].filename))
275 archive.close()
276 return True
277 else:
278 archive.close()
279 return False
280
281 def FixArchive(self, problem):
282 """Make the archive compliant.
283
284 Args:
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700285 problem: An enum, the reason the archive is invalid.
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800286
287 Returns:
288 Whether the file(s) removed to fix the archive could conceivably be
289 in an archive, but for some reason can't be added to this one.
290 """
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700291 archive_path = os.path.join(self.output_dir, self.current_archive)
292 return_value = None
293
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800294 if problem == 'SIZE':
295 archive_obj = self.OpenZipFileAtPath(archive_path, mode='r')
296 num_archive_files = len(archive_obj.infolist())
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700297
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800298 # IF there is a single file, that means its too large to compress,
299 # delete the created archive
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700300 # ELSE do normal finalization.
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800301 if num_archive_files == 1:
302 print ('WARNING: %s%s is too large to store.' % (
303 self.base_path, archive_obj.infolist()[0].filename))
304 archive_obj.close()
305 os.unlink(archive_path)
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700306 return_value = False
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800307 else:
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800308 archive_obj.close()
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700309 self.RemoveLastFile(
310 os.path.join(self.output_dir, self.current_archive))
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800311 print 'Final archive size for %s is %i' % (
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700312 self.current_archive, os.path.getsize(archive_path))
313 return_value = True
314 return return_value
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800315
316 def AddFileToArchive(self, filepath, compress_bit):
317 """Add the file at filepath to the current archive.
318
319 Args:
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700320 filepath: A string, the path of the file to add.
321 compress_bit: A boolean, whether or not this file should be compressed
322 when added.
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800323
324 Returns:
325 True if the file could be added (typically because this is a file) or
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700326 False if it couldn't be added (typically because its a directory).
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800327 """
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700328 curr_archive_path = os.path.join(self.output_dir, self.current_archive)
329 if os.path.isfile(filepath) and not os.path.islink(filepath):
330 if os.path.getsize(filepath) > 1048576:
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800331 print 'Warning: %s is potentially too large to serve on GAE' % filepath
332 archive = self.OpenZipFileAtPath(curr_archive_path,
333 compress=compress_bit)
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700334 # Add the file to the archive.
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800335 archive.write(filepath, filepath[len(self.base_path):])
336 archive.close()
337 return True
338 else:
339 return False
340
341 def ArchiveIsValid(self):
342 """Check whether the archive is valid.
343
344 Currently this only checks whether the archive is under the required size.
345 The thought is that eventually this will do additional validation
346
347 Returns:
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700348 True if the archive is valid, False if its not.
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800349 """
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700350 archive_path = os.path.join(self.output_dir, self.current_archive)
351 return os.path.getsize(archive_path) <= self.max_size
352
353
354def main(argv):
355 parser = CreateOptionsParser()
356 (options, unused_args) = parser.parse_args(args=argv[1:])
357 VerifyArguments(options, parser)
358 zipper = DirectoryZipper(options.destination,
359 options.sourcefiles,
360 ParseSize(options.filesize),
361 options.compress)
362 zipper.StartCompress()
363
The Android Open Source Project52d4c302009-03-03 19:29:09 -0800364
365if __name__ == '__main__':
The Android Open Source Project6ffae012009-03-18 17:39:43 -0700366 main(sys.argv)