Move some scripts that were in development/tools/scripts back in this project
They were moved into sdk/scripts when sdk was split from development.
Change-Id: I8404ae5fdeb9060adb76357f29b42c4c8e2054ee
diff --git a/scripts/divide_and_compress.py b/scripts/divide_and_compress.py
new file mode 100755
index 0000000..2bcb0ab
--- /dev/null
+++ b/scripts/divide_and_compress.py
@@ -0,0 +1,366 @@
+#!/usr/bin/python2.4
+#
+# Copyright (C) 2008 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Module to compress directories in to series of zip files.
+
+This module will take a directory and compress all its contents, including
+child directories into a series of zip files named N.zip where 'N' ranges from
+0 to infinity. The zip files will all be below a certain specified maximum
+threshold.
+
+The directory is compressed with a depth first traversal, each directory's
+file contents being compressed as it is visisted, before the compression of any
+child directory's contents. In this way the files within an archive are ordered
+and the archives themselves are ordered.
+
+The class also constructs a 'main.py' file intended for use with Google App
+Engine with a custom App Engine program not currently distributed with this
+code base. The custom App Engine runtime can leverage the index files written
+out by this class to more quickly locate which zip file to serve a given URL
+from.
+"""
+
+__author__ = 'jmatt@google.com (Justin Mattson)'
+
+import optparse
+import os
+import stat
+import sys
+import zipfile
+import divide_and_compress_constants
+
+
+def CreateOptionsParser():
+ """Creates the parser for command line arguments.
+
+ Returns:
+ A configured optparse.OptionParser object.
+ """
+ rtn = optparse.OptionParser()
+ rtn.add_option('-s', '--sourcefiles', dest='sourcefiles', default=None,
+ help='The directory containing the files to compress')
+ rtn.add_option('-d', '--destination', dest='destination', default=None,
+ help=('Where to put the archive files, this should not be'
+ ' a child of where the source files exist.'))
+ rtn.add_option('-f', '--filesize', dest='filesize', default='1M',
+ help=('Maximum size of archive files. A number followed by '
+ 'a magnitude indicator either "B", "K", "M", or "G". '
+ 'Examples:\n 1000000B == one million BYTES\n'
+ ' 1.2M == one point two MEGABYTES\n'
+ ' 1M == 1048576 BYTES'))
+ rtn.add_option('-n', '--nocompress', action='store_false', dest='compress',
+ default=True,
+ help=('Whether the archive files should be compressed, or '
+ 'just a concatenation of the source files'))
+ return rtn
+
+
+def VerifyArguments(options, parser):
+ """Runs simple checks on correctness of commandline arguments.
+
+ Args:
+ options: The command line options passed.
+ parser: The parser object used to parse the command string.
+ """
+ try:
+ if options.sourcefiles is None or options.destination is None:
+ parser.print_help()
+ sys.exit(-1)
+ except AttributeError:
+ parser.print_help()
+ sys.exit(-1)
+
+
+def ParseSize(size_str):
+ """Parse the file size argument from a string to a number of bytes.
+
+ Args:
+ size_str: The string representation of the file size.
+
+ Returns:
+ The file size in bytes.
+
+ Raises:
+ ValueError: Raises an error if the numeric or qualifier portions of the
+ file size argument is invalid.
+ """
+ if len(size_str) < 2:
+ raise ValueError(('filesize argument not understood, please include'
+ ' a numeric value and magnitude indicator'))
+ magnitude = size_str[-1]
+ if not magnitude in ('B', 'K', 'M', 'G'):
+ raise ValueError(('filesize magnitude indicator not valid, must be "B",'
+ '"K","M", or "G"'))
+ numeral = float(size_str[:-1])
+ if magnitude == 'K':
+ numeral *= 1024
+ elif magnitude == 'M':
+ numeral *= 1048576
+ elif magnitude == 'G':
+ numeral *= 1073741824
+ return int(numeral)
+
+
+class DirectoryZipper(object):
+ """Class to compress a directory and all its sub-directories."""
+
+ def __init__(self, output_path, base_dir, archive_size, enable_compression):
+ """DirectoryZipper constructor.
+
+ Args:
+ output_path: A string, the path to write the archives and index file to.
+ base_dir: A string, the directory to compress.
+ archive_size: An number, the maximum size, in bytes, of a single
+ archive file.
+ enable_compression: A boolean, whether or not compression should be
+ enabled, if disabled, the files will be written into an uncompresed
+ zip.
+ """
+ self.output_dir = output_path
+ self.current_archive = '0.zip'
+ self.base_path = base_dir
+ self.max_size = archive_size
+ self.compress = enable_compression
+
+ # Set index_fp to None, because we don't know what it will be yet.
+ self.index_fp = None
+
+ def StartCompress(self):
+ """Start compress of the directory.
+
+ This will start the compression process and write the archives to the
+ specified output directory. It will also produce an 'index.txt' file in the
+ output directory that maps from file to archive.
+ """
+ self.index_fp = open(os.path.join(self.output_dir, 'main.py'), 'w')
+ self.index_fp.write(divide_and_compress_constants.file_preamble)
+ os.path.walk(self.base_path, self.CompressDirectory, 1)
+ self.index_fp.write(divide_and_compress_constants.file_endpiece)
+ self.index_fp.close()
+
+ def RemoveLastFile(self, archive_path=None):
+ """Removes the last item in the archive.
+
+ This removes the last item in the archive by reading the items out of the
+ archive, adding them to a new archive, deleting the old archive, and
+ moving the new archive to the location of the old archive.
+
+ Args:
+ archive_path: Path to the archive to modify. This archive should not be
+ open elsewhere, since it will need to be deleted.
+
+ Returns:
+ A new ZipFile object that points to the modified archive file.
+ """
+ if archive_path is None:
+ archive_path = os.path.join(self.output_dir, self.current_archive)
+
+ # Move the old file and create a new one at its old location.
+ root, ext = os.path.splitext(archive_path)
+ old_archive = ''.join([root, '-old', ext])
+ os.rename(archive_path, old_archive)
+ old_fp = self.OpenZipFileAtPath(old_archive, mode='r')
+
+ # By default, store uncompressed.
+ compress_bit = zipfile.ZIP_STORED
+ if self.compress:
+ compress_bit = zipfile.ZIP_DEFLATED
+ new_fp = self.OpenZipFileAtPath(archive_path,
+ mode='w',
+ compress=compress_bit)
+
+ # Read the old archive in a new archive, except the last one.
+ for zip_member in old_fp.infolist()[:-1]:
+ new_fp.writestr(zip_member, old_fp.read(zip_member.filename))
+
+ # Close files and delete the old one.
+ old_fp.close()
+ new_fp.close()
+ os.unlink(old_archive)
+
+ def OpenZipFileAtPath(self, path, mode=None, compress=zipfile.ZIP_DEFLATED):
+ """This method is mainly for testing purposes, eg dependency injection."""
+ if mode is None:
+ if os.path.exists(path):
+ mode = 'a'
+ else:
+ mode = 'w'
+
+ if mode == 'r':
+ return zipfile.ZipFile(path, mode)
+ else:
+ return zipfile.ZipFile(path, mode, compress)
+
+ def CompressDirectory(self, unused_id, dir_path, dir_contents):
+ """Method to compress the given directory.
+
+ This method compresses the directory 'dir_path'. It will add to an existing
+ zip file that still has space and create new ones as necessary to keep zip
+ file sizes under the maximum specified size. This also writes out the
+ mapping of files to archives to the self.index_fp file descriptor
+
+ Args:
+ unused_id: A numeric identifier passed by the os.path.walk method, this
+ is not used by this method.
+ dir_path: A string, the path to the directory to compress.
+ dir_contents: A list of directory contents to be compressed.
+ """
+ # Construct the queue of files to be added that this method will use
+ # it seems that dir_contents is given in reverse alphabetical order,
+ # so put them in alphabetical order by inserting to front of the list.
+ dir_contents.sort()
+ zip_queue = []
+ for filename in dir_contents:
+ zip_queue.append(os.path.join(dir_path, filename))
+ compress_bit = zipfile.ZIP_DEFLATED
+ if not self.compress:
+ compress_bit = zipfile.ZIP_STORED
+
+ # Zip all files in this directory, adding to existing archives and creating
+ # as necessary.
+ while zip_queue:
+ target_file = zip_queue[0]
+ if os.path.isfile(target_file):
+ self.AddFileToArchive(target_file, compress_bit)
+
+ # See if adding the new file made our archive too large.
+ if not self.ArchiveIsValid():
+
+ # IF fixing fails, the last added file was to large, skip it
+ # ELSE the current archive filled normally, make a new one and try
+ # adding the file again.
+ if not self.FixArchive('SIZE'):
+ zip_queue.pop(0)
+ else:
+ self.current_archive = '%i.zip' % (
+ int(self.current_archive[
+ 0:self.current_archive.rfind('.zip')]) + 1)
+ else:
+
+ # Write an index record if necessary.
+ self.WriteIndexRecord()
+ zip_queue.pop(0)
+ else:
+ zip_queue.pop(0)
+
+ def WriteIndexRecord(self):
+ """Write an index record to the index file.
+
+ Only write an index record if this is the first file to go into archive
+
+ Returns:
+ True if an archive record is written, False if it isn't.
+ """
+ archive = self.OpenZipFileAtPath(
+ os.path.join(self.output_dir, self.current_archive), 'r')
+ archive_index = archive.infolist()
+ if len(archive_index) == 1:
+ self.index_fp.write(
+ '[\'%s\', \'%s\'],\n' % (self.current_archive,
+ archive_index[0].filename))
+ archive.close()
+ return True
+ else:
+ archive.close()
+ return False
+
+ def FixArchive(self, problem):
+ """Make the archive compliant.
+
+ Args:
+ problem: An enum, the reason the archive is invalid.
+
+ Returns:
+ Whether the file(s) removed to fix the archive could conceivably be
+ in an archive, but for some reason can't be added to this one.
+ """
+ archive_path = os.path.join(self.output_dir, self.current_archive)
+ return_value = None
+
+ if problem == 'SIZE':
+ archive_obj = self.OpenZipFileAtPath(archive_path, mode='r')
+ num_archive_files = len(archive_obj.infolist())
+
+ # IF there is a single file, that means its too large to compress,
+ # delete the created archive
+ # ELSE do normal finalization.
+ if num_archive_files == 1:
+ print ('WARNING: %s%s is too large to store.' % (
+ self.base_path, archive_obj.infolist()[0].filename))
+ archive_obj.close()
+ os.unlink(archive_path)
+ return_value = False
+ else:
+ archive_obj.close()
+ self.RemoveLastFile(
+ os.path.join(self.output_dir, self.current_archive))
+ print 'Final archive size for %s is %i' % (
+ self.current_archive, os.path.getsize(archive_path))
+ return_value = True
+ return return_value
+
+ def AddFileToArchive(self, filepath, compress_bit):
+ """Add the file at filepath to the current archive.
+
+ Args:
+ filepath: A string, the path of the file to add.
+ compress_bit: A boolean, whether or not this file should be compressed
+ when added.
+
+ Returns:
+ True if the file could be added (typically because this is a file) or
+ False if it couldn't be added (typically because its a directory).
+ """
+ curr_archive_path = os.path.join(self.output_dir, self.current_archive)
+ if os.path.isfile(filepath) and not os.path.islink(filepath):
+ if os.path.getsize(filepath) > 1048576:
+ print 'Warning: %s is potentially too large to serve on GAE' % filepath
+ archive = self.OpenZipFileAtPath(curr_archive_path,
+ compress=compress_bit)
+ # Add the file to the archive.
+ archive.write(filepath, filepath[len(self.base_path):])
+ archive.close()
+ return True
+ else:
+ return False
+
+ def ArchiveIsValid(self):
+ """Check whether the archive is valid.
+
+ Currently this only checks whether the archive is under the required size.
+ The thought is that eventually this will do additional validation
+
+ Returns:
+ True if the archive is valid, False if its not.
+ """
+ archive_path = os.path.join(self.output_dir, self.current_archive)
+ return os.path.getsize(archive_path) <= self.max_size
+
+
+def main(argv):
+ parser = CreateOptionsParser()
+ (options, unused_args) = parser.parse_args(args=argv[1:])
+ VerifyArguments(options, parser)
+ zipper = DirectoryZipper(options.destination,
+ options.sourcefiles,
+ ParseSize(options.filesize),
+ options.compress)
+ zipper.StartCompress()
+
+
+if __name__ == '__main__':
+ main(sys.argv)