blob: d369be4a4e07a9d3e004374e7536901e8d0820c1 [file] [log] [blame]
The Android Open Source Project52d4c302009-03-03 19:29:09 -08001#!/usr/bin/python2.4
2#
3# Copyright (C) 2008 Google Inc.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""Module to compress directories in to series of zip files.
19
20This module will take a directory and compress all its contents, including
21child directories into a series of zip files named N.zip where 'N' ranges from
220 to infinity. The zip files will all be below a certain specified maximum
23threshold.
24
25The directory is compressed with a depth first traversal, each directory's
26file contents being compressed as it is visisted, before the compression of any
27child directory's contents. In this way the files within an archive are ordered
28and the archives themselves are ordered.
29
30The class also constructs a 'main.py' file intended for use with Google App
31Engine with a custom App Engine program not currently distributed with this
32code base. The custom App Engine runtime can leverage the index files written
33out by this class to more quickly locate which zip file to serve a given URL
34from.
35"""
36
37__author__ = 'jmatt@google.com (Justin Mattson)'
38
39from optparse import OptionParser
40import os
41import stat
42import sys
43import zipfile
44from zipfile import ZipFile
45import divide_and_compress_constants
46
47
48def Main(argv):
49 parser = CreateOptionsParser()
50 (options, args) = parser.parse_args()
51 VerifyArguments(options, parser)
52 zipper = DirectoryZipper(options.destination,
53 options.sourcefiles,
54 ParseSize(options.filesize),
55 options.compress)
56 zipper.StartCompress()
57
58
59def CreateOptionsParser():
60 rtn = OptionParser()
61 rtn.add_option('-s', '--sourcefiles', dest='sourcefiles', default=None,
62 help='The directory containing the files to compress')
63 rtn.add_option('-d', '--destination', dest='destination', default=None,
64 help=('Where to put the archive files, this should not be'
65 ' a child of where the source files exist.'))
66 rtn.add_option('-f', '--filesize', dest='filesize', default='1M',
67 help=('Maximum size of archive files. A number followed by'
68 'a magnitude indicator, eg. 1000000B == one million '
69 'BYTES, 500K == five hundred KILOBYTES, 1.2M == one '
70 'point two MEGABYTES. 1M == 1048576 BYTES'))
71 rtn.add_option('-n', '--nocompress', action='store_false', dest='compress',
72 default=True,
73 help=('Whether the archive files should be compressed, or '
74 'just a concatenation of the source files'))
75 return rtn
76
77
78def VerifyArguments(options, parser):
79 try:
80 if options.sourcefiles is None or options.destination is None:
81 parser.print_help()
82 sys.exit(-1)
83 except (AttributeError), err:
84 parser.print_help()
85 sys.exit(-1)
86
87
88def ParseSize(size_str):
89 if len(size_str) < 2:
90 raise ValueError(('filesize argument not understood, please include'
91 ' a numeric value and magnitude indicator'))
92 magnitude = size_str[len(size_str)-1:]
93 if not magnitude in ('K', 'B', 'M'):
94 raise ValueError(('filesize magnitude indicator not valid, must be \'K\','
95 '\'B\', or \'M\''))
96 numeral = float(size_str[0:len(size_str)-1])
97 if magnitude == 'K':
98 numeral *= 1024
99 elif magnitude == 'M':
100 numeral *= 1048576
101 return int(numeral)
102
103
104class DirectoryZipper(object):
105 """Class to compress a directory and all its sub-directories."""
106 current_archive = None
107 output_dir = None
108 base_path = None
109 max_size = None
110 compress = None
111 index_fp = None
112
113 def __init__(self, output_path, base_dir, archive_size, enable_compression):
114 """DirectoryZipper constructor.
115
116 Args:
117 output_path: the path to write the archives and index file to
118 base_dir: the directory to compress
119 archive_size: the maximum size, in bytes, of a single archive file
120 enable_compression: whether or not compression should be enabled, if
121 disabled, the files will be written into an uncompresed zip
122 """
123 self.output_dir = output_path
124 self.current_archive = '0.zip'
125 self.base_path = base_dir
126 self.max_size = archive_size
127 self.compress = enable_compression
128
129 def StartCompress(self):
130 """Start compress of the directory.
131
132 This will start the compression process and write the archives to the
133 specified output directory. It will also produce an 'index.txt' file in the
134 output directory that maps from file to archive.
135 """
136 self.index_fp = open(''.join([self.output_dir, 'main.py']), 'w')
137 self.index_fp.write(divide_and_compress_constants.file_preamble)
138 os.path.walk(self.base_path, self.CompressDirectory, 1)
139 self.index_fp.write(divide_and_compress_constants.file_endpiece)
140 self.index_fp.close()
141
142 def RemoveLastFile(self, archive_path=None):
143 """Removes the last item in the archive.
144
145 This removes the last item in the archive by reading the items out of the
146 archive, adding them to a new archive, deleting the old archive, and
147 moving the new archive to the location of the old archive.
148
149 Args:
150 archive_path: Path to the archive to modify. This archive should not be
151 open elsewhere, since it will need to be deleted.
152 Return:
153 A new ZipFile object that points to the modified archive file
154 """
155 if archive_path is None:
156 archive_path = ''.join([self.output_dir, self.current_archive])
157
158 # Move the old file and create a new one at its old location
159 ext_offset = archive_path.rfind('.')
160 old_archive = ''.join([archive_path[0:ext_offset], '-old',
161 archive_path[ext_offset:]])
162 os.rename(archive_path, old_archive)
163 old_fp = self.OpenZipFileAtPath(old_archive, mode='r')
164
165 if self.compress:
166 new_fp = self.OpenZipFileAtPath(archive_path,
167 mode='w',
168 compress=zipfile.ZIP_DEFLATED)
169 else:
170 new_fp = self.OpenZipFileAtPath(archive_path,
171 mode='w',
172 compress=zipfile.ZIP_STORED)
173
174 # Read the old archive in a new archive, except the last one
175 zip_members = enumerate(old_fp.infolist())
176 num_members = len(old_fp.infolist())
177 while num_members > 1:
178 this_member = zip_members.next()[1]
179 new_fp.writestr(this_member.filename, old_fp.read(this_member.filename))
180 num_members -= 1
181
182 # Close files and delete the old one
183 old_fp.close()
184 new_fp.close()
185 os.unlink(old_archive)
186
187 def OpenZipFileAtPath(self, path, mode=None, compress=zipfile.ZIP_DEFLATED):
188 """This method is mainly for testing purposes, eg dependency injection."""
189 if mode is None:
190 if os.path.exists(path):
191 mode = 'a'
192 else:
193 mode = 'w'
194
195 if mode == 'r':
196 return ZipFile(path, mode)
197 else:
198 return ZipFile(path, mode, compress)
199
200 def CompressDirectory(self, irrelevant, dir_path, dir_contents):
201 """Method to compress the given directory.
202
203 This method compresses the directory 'dir_path'. It will add to an existing
204 zip file that still has space and create new ones as necessary to keep zip
205 file sizes under the maximum specified size. This also writes out the
206 mapping of files to archives to the self.index_fp file descriptor
207
208 Args:
209 irrelevant: a numeric identifier passed by the os.path.walk method, this
210 is not used by this method
211 dir_path: the path to the directory to compress
212 dir_contents: a list of directory contents to be compressed
213 """
214
215 # construct the queue of files to be added that this method will use
216 # it seems that dir_contents is given in reverse alphabetical order,
217 # so put them in alphabetical order by inserting to front of the list
218 dir_contents.sort()
219 zip_queue = []
220 if dir_path[len(dir_path) - 1:] == os.sep:
221 for filename in dir_contents:
222 zip_queue.append(''.join([dir_path, filename]))
223 else:
224 for filename in dir_contents:
225 zip_queue.append(''.join([dir_path, os.sep, filename]))
226 compress_bit = zipfile.ZIP_DEFLATED
227 if not self.compress:
228 compress_bit = zipfile.ZIP_STORED
229
230 # zip all files in this directory, adding to existing archives and creating
231 # as necessary
232 while len(zip_queue) > 0:
233 target_file = zip_queue[0]
234 if os.path.isfile(target_file):
235 self.AddFileToArchive(target_file, compress_bit)
236
237 # see if adding the new file made our archive too large
238 if not self.ArchiveIsValid():
239
240 # IF fixing fails, the last added file was to large, skip it
241 # ELSE the current archive filled normally, make a new one and try
242 # adding the file again
243 if not self.FixArchive('SIZE'):
244 zip_queue.pop(0)
245 else:
246 self.current_archive = '%i.zip' % (
247 int(self.current_archive[
248 0:self.current_archive.rfind('.zip')]) + 1)
249 else:
250
251 # if this the first file in the archive, write an index record
252 self.WriteIndexRecord()
253 zip_queue.pop(0)
254 else:
255 zip_queue.pop(0)
256
257 def WriteIndexRecord(self):
258 """Write an index record to the index file.
259
260 Only write an index record if this is the first file to go into archive
261
262 Returns:
263 True if an archive record is written, False if it isn't
264 """
265 archive = self.OpenZipFileAtPath(
266 ''.join([self.output_dir, self.current_archive]), 'r')
267 archive_index = archive.infolist()
268 if len(archive_index) == 1:
269 self.index_fp.write(
270 '[\'%s\', \'%s\'],\n' % (self.current_archive,
271 archive_index[0].filename))
272 archive.close()
273 return True
274 else:
275 archive.close()
276 return False
277
278 def FixArchive(self, problem):
279 """Make the archive compliant.
280
281 Args:
282 problem: the reason the archive is invalid
283
284 Returns:
285 Whether the file(s) removed to fix the archive could conceivably be
286 in an archive, but for some reason can't be added to this one.
287 """
288 archive_path = ''.join([self.output_dir, self.current_archive])
289 rtn_value = None
290
291 if problem == 'SIZE':
292 archive_obj = self.OpenZipFileAtPath(archive_path, mode='r')
293 num_archive_files = len(archive_obj.infolist())
294
295 # IF there is a single file, that means its too large to compress,
296 # delete the created archive
297 # ELSE do normal finalization
298 if num_archive_files == 1:
299 print ('WARNING: %s%s is too large to store.' % (
300 self.base_path, archive_obj.infolist()[0].filename))
301 archive_obj.close()
302 os.unlink(archive_path)
303 rtn_value = False
304 else:
305 self.RemoveLastFile(''.join([self.output_dir, self.current_archive]))
306 archive_obj.close()
307 print 'Final archive size for %s is %i' % (
308 self.current_archive, os.stat(archive_path)[stat.ST_SIZE])
309 rtn_value = True
310 return rtn_value
311
312 def AddFileToArchive(self, filepath, compress_bit):
313 """Add the file at filepath to the current archive.
314
315 Args:
316 filepath: the path of the file to add
317 compress_bit: whether or not this fiel should be compressed when added
318
319 Returns:
320 True if the file could be added (typically because this is a file) or
321 False if it couldn't be added (typically because its a directory)
322 """
323 curr_archive_path = ''.join([self.output_dir, self.current_archive])
324 if os.path.isfile(filepath):
325 if os.stat(filepath)[stat.ST_SIZE] > 1048576:
326 print 'Warning: %s is potentially too large to serve on GAE' % filepath
327 archive = self.OpenZipFileAtPath(curr_archive_path,
328 compress=compress_bit)
329 # add the file to the archive
330 archive.write(filepath, filepath[len(self.base_path):])
331 archive.close()
332 return True
333 else:
334 return False
335
336 def ArchiveIsValid(self):
337 """Check whether the archive is valid.
338
339 Currently this only checks whether the archive is under the required size.
340 The thought is that eventually this will do additional validation
341
342 Returns:
343 True if the archive is valid, False if its not
344 """
345 archive_path = ''.join([self.output_dir, self.current_archive])
346 if os.stat(archive_path)[stat.ST_SIZE] > self.max_size:
347 return False
348 else:
349 return True
350
351if __name__ == '__main__':
352 Main(sys.argv)