Change download-baselines script to download images from skia-autogen SVN repo

I have copied the old version (which downloads the images from the buildbots
directly, but only works with our Mac buildbots) to download-baselines-old,
so we can use either version during a transition period.

Another difference: the new version sets the mimetype property of all image
files in the baseline_subdir, even those that have not changed.

BUG=386
http://code.google.com/p/skia/issues/detail?id=386 ('make buildbots write out RunGM image results to a browsable directory')
Review URL: https://codereview.appspot.com/5544056

git-svn-id: http://skia.googlecode.com/svn/trunk@3058 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/tools/download-baselines-old.py b/tools/download-baselines-old.py
new file mode 100644
index 0000000..80f866e
--- /dev/null
+++ b/tools/download-baselines-old.py
@@ -0,0 +1,188 @@
+'''
+TODO: THIS IS AN OLD VERSION OF DOWNLOAD-BASELINES THAT DOWNLOADS BASELINES
+DIRECTLY FROM THE BUILDBOT SLAVES.  ONCE THE NEW VERSION IS WORKING CORRECTLY,
+WE SHOULD DELETE THIS VERSION.
+
+Downloads the actual gm results most recently generated by the Skia buildbots,
+and adds any new ones to SVN control.
+
+This tool makes it much easier to check in new baselines, via the following
+steps:
+
+cd .../trunk
+svn update
+# make sure there are no files awaiting svn commit
+python tools/download-baselines-old.py gm/base-macmini-lion-fixed  # or other gm/ subdir
+# upload CL for review
+# validate that the new images look right
+# commit CL
+
+Launch with --help to see more options.
+
+
+Copyright 2011 Google Inc.
+
+Use of this source code is governed by a BSD-style license that can be
+found in the LICENSE file.
+'''
+
+# common Python modules
+import optparse
+import os
+import re
+import sys
+import urllib2
+
+# modules declared within this same directory
+import svn
+
+# Where to download recently generated baseline images for each baseline type.
+#
+# For now this only works for our Mac buildbots; our other buildbots aren't
+# uploading their results to a web server yet.
+#
+# Note also that these will currently work only within the Google corporate
+# network; that will also change soon.
+ACTUALS_BY_BASELINE_SUBDIR = {
+    'gm/base-macmini':
+        'http://172.29.92.185/b/build/slave/Skia_Mac_Float_NoDebug/gm/actual',
+    'gm/base-macmini-fixed':
+        'http://172.29.92.185/b/build/slave/Skia_Mac_Fixed_NoDebug/gm/actual',
+    'gm/base-macmini-lion-fixed':
+        'http://172.29.92.179/b/build/slave/Skia_MacMiniLion_Fixed_NoDebug/gm/actual',
+    'gm/base-macmini-lion-float':
+        'http://172.29.92.179/b/build/slave/Skia_MacMiniLion_Float_NoDebug/gm/actual',
+}
+
+USAGE_STRING = 'usage: %s [options] <baseline_subdir>'
+OPTION_IGNORE_LOCAL_MODS = '--ignore-local-mods'
+OPTION_ADD_NEW_FILES = '--add-new-files'
+
+IMAGE_REGEX = '.+\.png'
+IMAGE_MIMETYPE = 'image/png'
+
+def GetPlatformUrl(baseline_subdir):
+    """Return URL within which the buildbots store generated baseline images,
+    as of multiple svn revisions.
+
+    Raises KeyError if we don't have a URL matching this baseline_subdir.
+
+    @param baseline_subdir indicates which platform we want images for
+    """
+    try:
+        return ACTUALS_BY_BASELINE_SUBDIR[baseline_subdir]
+    except KeyError:
+        raise KeyError(
+            'unknown baseline_subdir "%s", try one of these instead: %s' % (
+                baseline_subdir, ACTUALS_BY_BASELINE_SUBDIR.keys()))
+
+def GetLatestResultsUrl(baseline_subdir):
+    """Return URL from which we can download the MOST RECENTLY generated
+    images for this baseline type.
+
+    @param baseline_subdir indicates which platform we want images for
+    """
+    base_platform_url = GetPlatformUrl(baseline_subdir)
+    print 'base_platform_url is %s' % base_platform_url
+
+    # Find the most recently generated baseline images within base_platform_url
+    response = urllib2.urlopen(base_platform_url)
+    html = response.read()
+    link_regex = re.compile('<a href="(.*)">')
+    links = link_regex.findall(html)
+    last_link = links[-1]
+    most_recent_result_url = '%s/%s' % (base_platform_url, last_link)
+    print 'most_recent_result_url is %s' % most_recent_result_url
+    return most_recent_result_url
+
+def DownloadMatchingFiles(source_url, filename_regex, dest_dir,
+                          only_download_updates=False):
+    """Download all files from source_url that match filename_regex, and save
+    them (with their original filenames) in dest_dir.
+
+    @param source_url
+    @param filename_regex only download files that match this regex
+    @param dest_dir where to save the downloaded files
+    @param only_download_updates if True, only download files that are already
+           present in dest_dir (download updated versions of those files)
+    """
+    while source_url.endswith('/'):
+        source_url = source_url[:-1]
+    response = urllib2.urlopen(source_url)
+    html = response.read()
+    link_regex = re.compile('<a href="(%s)">' % filename_regex)
+    links = link_regex.findall(html)
+    for link in links:
+        dest_path = os.path.join(dest_dir, link)
+        if only_download_updates and not os.path.isfile(dest_path):
+            continue
+        DownloadBinaryFile('%s/%s' % (source_url, link), dest_path)
+
+def DownloadBinaryFile(source_url, dest_path):
+    """Download a single file from its source_url and save it to local disk
+    at dest_path.
+
+    @param source_url
+    @param dest_path
+    """
+    print 'DownloadBinaryFile: %s -> %s' % (source_url, dest_path)
+    url_fh = urllib2.urlopen(source_url)
+    local_fh = open(dest_path, 'wb')
+    local_fh.write(url_fh.read())
+    local_fh.close()
+
+def Main(options, args):
+    """Download most recently generated baseline images for a given platform,
+    and add any new ones to SVN control.
+
+    @param options
+    @param args
+    """
+    num_args = len(args)
+    if num_args != 1:
+        RaiseUsageException()
+
+    baseline_subdir = args[0]
+    while baseline_subdir.endswith('/'):
+        baseline_subdir = baseline_subdir[:-1]
+    svn_handler = svn.Svn(baseline_subdir)
+
+    # If there are any locally modified files in that directory, exit
+    # (so that we don't risk overwriting the user's previous work).
+    new_and_modified_files = svn_handler.GetNewAndModifiedFiles()
+    if not options.ignore_local_mods:
+        if new_and_modified_files:
+            raise Exception('Exiting because there are already new and/or '
+                            'modified files in %s.  To continue in spite of '
+                            'that, run with %s option.' % (
+                                baseline_subdir, OPTION_IGNORE_LOCAL_MODS))
+
+    # Download the actual results from the appropriate buildbot.
+    results_url = GetLatestResultsUrl(baseline_subdir)
+    DownloadMatchingFiles(source_url=results_url, filename_regex=IMAGE_REGEX,
+                          dest_dir=baseline_subdir,
+                          only_download_updates=(not options.add_new_files))
+
+    # Add any new files to SVN control (if we are running with add_new_files).
+    new_files = svn_handler.GetNewFiles()
+    if new_files and options.add_new_files:
+        svn_handler.AddFiles(new_files)
+        svn_handler.SetProperty(new_files, svn.PROPERTY_MIMETYPE,
+                                IMAGE_MIMETYPE)
+
+def RaiseUsageException():
+    raise Exception(USAGE_STRING %  __file__)
+
+if __name__ == '__main__':
+    parser = optparse.OptionParser(USAGE_STRING % '%prog')
+    parser.add_option(OPTION_IGNORE_LOCAL_MODS,
+                      action='store_true', default=False,
+                      help='allow tool to run even if there are already '
+                      'local modifications in the baseline_subdir')
+    parser.add_option(OPTION_ADD_NEW_FILES,
+                      action='store_true', default=False,
+                      help='in addition to downloading new versions of '
+                      'existing baselines, also download baselines that are '
+                      'not under SVN control yet')
+    (options, args) = parser.parse_args()
+    Main(options, args)
diff --git a/tools/download-baselines.py b/tools/download-baselines.py
index d41b905..11417be 100644
--- a/tools/download-baselines.py
+++ b/tools/download-baselines.py
@@ -23,109 +23,57 @@
 '''
 
 # common Python modules
+import fnmatch
 import optparse
 import os
 import re
+import shutil
 import sys
-import urllib2
+import tempfile
 
 # modules declared within this same directory
 import svn
 
-# Where to download recently generated baseline images for each baseline type.
-#
-# For now this only works for our Mac buildbots; our other buildbots aren't
-# uploading their results to a web server yet.
-#
-# Note also that these will currently work only within the Google corporate
-# network; that will also change soon.
-ACTUALS_BY_BASELINE_SUBDIR = {
-    'gm/base-macmini':
-        'http://172.29.92.185/b/build/slave/Skia_Mac_Float_NoDebug/gm/actual',
-    'gm/base-macmini-fixed':
-        'http://172.29.92.185/b/build/slave/Skia_Mac_Fixed_NoDebug/gm/actual',
-    'gm/base-macmini-lion-fixed':
-        'http://172.29.92.179/b/build/slave/Skia_MacMiniLion_Fixed_NoDebug/gm/actual',
-    'gm/base-macmini-lion-float':
-        'http://172.29.92.179/b/build/slave/Skia_MacMiniLion_Float_NoDebug/gm/actual',
-}
+# Base URL of SVN repository where buildbots store actual gm image results.
+SVN_BASE_URL = 'http://skia-autogen.googlecode.com/svn/gm-actual'
 
 USAGE_STRING = 'usage: %s [options] <baseline_subdir>'
 OPTION_IGNORE_LOCAL_MODS = '--ignore-local-mods'
 OPTION_ADD_NEW_FILES = '--add-new-files'
 
-IMAGE_REGEX = '.+\.png'
-IMAGE_MIMETYPE = 'image/png'
-
-def GetPlatformUrl(baseline_subdir):
-    """Return URL within which the buildbots store generated baseline images,
-    as of multiple svn revisions.
-
-    Raises KeyError if we don't have a URL matching this baseline_subdir.
-
-    @param baseline_subdir indicates which platform we want images for
-    """
-    try:
-        return ACTUALS_BY_BASELINE_SUBDIR[baseline_subdir]
-    except KeyError:
-        raise KeyError(
-            'unknown baseline_subdir "%s", try one of these instead: %s' % (
-                baseline_subdir, ACTUALS_BY_BASELINE_SUBDIR.keys()))
-
-def GetLatestResultsUrl(baseline_subdir):
-    """Return URL from which we can download the MOST RECENTLY generated
+def GetLatestResultsSvnUrl(baseline_subdir):
+    """Return SVN URL from which we can check out the MOST RECENTLY generated
     images for this baseline type.
 
     @param baseline_subdir indicates which platform we want images for
     """
-    base_platform_url = GetPlatformUrl(baseline_subdir)
-    print 'base_platform_url is %s' % base_platform_url
+    # trim off 'gm/' prefix
+    gm_prefix = 'gm%s' % os.sep
+    if not baseline_subdir.startswith(gm_prefix):
+        raise Exception('baseline_subdir "%s" should start with "%s"' % (
+            baseline_subdir, gm_prefix))
+    return '%s/%s' % (SVN_BASE_URL, baseline_subdir[len(gm_prefix):])
 
-    # Find the most recently generated baseline images within base_platform_url
-    response = urllib2.urlopen(base_platform_url)
-    html = response.read()
-    link_regex = re.compile('<a href="(.*)">')
-    links = link_regex.findall(html)
-    last_link = links[-1]
-    most_recent_result_url = '%s/%s' % (base_platform_url, last_link)
-    print 'most_recent_result_url is %s' % most_recent_result_url
-    return most_recent_result_url
+def CopyMatchingFiles(source_dir, dest_dir, filename_pattern,
+                      only_copy_updates=False):
+    """Copy all files from source_dir that match filename_pattern, and
+    save them (with their original filenames) in dest_dir.
 
-def DownloadMatchingFiles(source_url, filename_regex, dest_dir,
-                          only_download_updates=False):
-    """Download all files from source_url that match filename_regex, and save
-    them (with their original filenames) in dest_dir.
-
-    @param source_url
-    @param filename_regex only download files that match this regex
-    @param dest_dir where to save the downloaded files
-    @param only_download_updates if True, only download files that are already
-           present in dest_dir (download updated versions of those files)
+    @param source_dir
+    @param dest_dir where to save the copied files
+    @param filename_pattern only copy files that match this Unix-style filename
+           pattern (e.g., '*.jpg')
+    @param only_copy_updates if True, only copy files that are already
+           present in dest_dir
     """
-    while source_url.endswith('/'):
-        source_url = source_url[:-1]
-    response = urllib2.urlopen(source_url)
-    html = response.read()
-    link_regex = re.compile('<a href="(%s)">' % filename_regex)
-    links = link_regex.findall(html)
-    for link in links:
-        dest_path = os.path.join(dest_dir, link)
-        if only_download_updates and not os.path.isfile(dest_path):
+    all_filenames = os.listdir(source_dir)
+    matching_filenames = fnmatch.filter(all_filenames, filename_pattern)
+    for filename in matching_filenames:
+        source_path = os.path.join(source_dir, filename)
+        dest_path = os.path.join(dest_dir, filename)
+        if only_copy_updates and not os.path.isfile(dest_path):
             continue
-        DownloadBinaryFile('%s/%s' % (source_url, link), dest_path)
-
-def DownloadBinaryFile(source_url, dest_path):
-    """Download a single file from its source_url and save it to local disk
-    at dest_path.
-
-    @param source_url
-    @param dest_path
-    """
-    print 'DownloadBinaryFile: %s -> %s' % (source_url, dest_path)
-    url_fh = urllib2.urlopen(source_url)
-    local_fh = open(dest_path, 'wb')
-    local_fh.write(url_fh.read())
-    local_fh.close()
+        shutil.copyfile(source_path, dest_path)
 
 def Main(options, args):
     """Download most recently generated baseline images for a given platform,
@@ -138,14 +86,15 @@
     if num_args != 1:
         RaiseUsageException()
 
-    baseline_subdir = args[0]
-    while baseline_subdir.endswith('/'):
-        baseline_subdir = baseline_subdir[:-1]
-    svn_handler = svn.Svn(baseline_subdir)
+    # Create repo_to_modify to handle the SVN repository we will add files to.
+    baseline_subdir = args[0].rstrip(os.sep);
+    if not os.path.isdir(baseline_subdir):
+        raise Exception('could not find baseline_subdir "%s"' % baseline_subdir)
+    repo_to_modify = svn.Svn(baseline_subdir)
 
     # If there are any locally modified files in that directory, exit
     # (so that we don't risk overwriting the user's previous work).
-    new_and_modified_files = svn_handler.GetNewAndModifiedFiles()
+    new_and_modified_files = repo_to_modify.GetNewAndModifiedFiles()
     if not options.ignore_local_mods:
         if new_and_modified_files:
             raise Exception('Exiting because there are already new and/or '
@@ -153,18 +102,31 @@
                             'that, run with %s option.' % (
                                 baseline_subdir, OPTION_IGNORE_LOCAL_MODS))
 
-    # Download the actual results from the appropriate buildbot.
-    results_url = GetLatestResultsUrl(baseline_subdir)
-    DownloadMatchingFiles(source_url=results_url, filename_regex=IMAGE_REGEX,
-                          dest_dir=baseline_subdir,
-                          only_download_updates=(not options.add_new_files))
+    # Download actual gm images into a separate repo in a temporary directory.
+    tempdir = tempfile.mkdtemp()
+    download_repo = svn.Svn(tempdir)
+    download_repo.Checkout(GetLatestResultsSvnUrl(baseline_subdir), '.')
+
+    # Copy any of those files we are interested in into repo_to_modify,
+    # and then delete the temporary directory.
+    CopyMatchingFiles(source_dir=tempdir, dest_dir=baseline_subdir,
+                      filename_pattern='*.png',
+                      only_copy_updates=(not options.add_new_files))
+    shutil.rmtree(tempdir)
+    download_repo = None
 
     # Add any new files to SVN control (if we are running with add_new_files).
-    new_files = svn_handler.GetNewFiles()
+    new_files = repo_to_modify.GetNewFiles()
     if new_files and options.add_new_files:
-        svn_handler.AddFiles(new_files)
-        svn_handler.SetProperty(new_files, svn.PROPERTY_MIMETYPE,
-                                IMAGE_MIMETYPE)
+        repo_to_modify.AddFiles(new_files)
+
+    # Set the mimetype property on *all* image files in baseline_subdir, even
+    # the ones that were already there (in case that property wasn't properly
+    # set already).
+    repo_to_modify.SetPropertyByFilenamePattern(
+        '*.png', svn.PROPERTY_MIMETYPE, 'image/png')
+    repo_to_modify.SetPropertyByFilenamePattern(
+        '*.pdf', svn.PROPERTY_MIMETYPE, 'application/pdf')
 
 def RaiseUsageException():
     raise Exception(USAGE_STRING %  __file__)
diff --git a/tools/svn.py b/tools/svn.py
index ab811aa..b2f010c 100644
--- a/tools/svn.py
+++ b/tools/svn.py
@@ -5,6 +5,8 @@
 found in the LICENSE file.
 '''
 
+import fnmatch
+import os
 import re
 import subprocess
 
@@ -25,15 +27,25 @@
 
         @param args a list of arguments
         """
+        print 'RunCommand: %s' % args
         proc = subprocess.Popen(args, cwd=self._directory,
-                                stdout=subprocess.PIPE)
-        stdout = proc.communicate()[0]
-        returncode = proc.returncode
-        if returncode is not 0:
-            raise Exception('command "%s" failed in dir "%s": returncode=%s' %
-                            (args, self._directory, returncode))
+                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        (stdout, stderr) = proc.communicate()
+        if proc.returncode is not 0:
+            raise Exception('command "%s" failed in dir "%s": %s' %
+                            (args, self._directory, stderr))
         return stdout
 
+    def Checkout(self, url, path):
+        """Check out a working copy from a repository.
+        Returns stdout as a single string.
+
+        @param url URL from which to check out the working copy
+        @param path path (within self._directory) where the local copy will be
+        written
+        """
+        return self._RunCommand(['svn', 'checkout', url, path])
+
     def GetNewFiles(self):
         """Return a list of files which are in this directory but NOT under
         SVN control.
@@ -57,10 +69,7 @@
 
         @param filenames files to add to SVN control
         """
-        args = ['svn', 'add']
-        args.extend(filenames)
-        print '\n\nAddFiles: %s' % args
-        print self._RunCommand(args)
+        self._RunCommand(['svn', 'add'] + filenames)
 
     def SetProperty(self, filenames, property_name, property_value):
         """Sets a svn property for these files.
@@ -69,7 +78,19 @@
         @param property_name property_name to set for each file
         @param property_value what to set the property_name to
         """
-        args = ['svn', 'propset', property_name, property_value]
-        args.extend(filenames)
-        print '\n\nSetProperty: %s' % args
-        print self._RunCommand(args)
+        if filenames:
+            self._RunCommand(
+                ['svn', 'propset', property_name, property_value] + filenames)
+
+    def SetPropertyByFilenamePattern(self, filename_pattern,
+                                     property_name, property_value):
+        """Sets a svn property for all files matching filename_pattern.
+
+        @param filename_pattern set the property for all files whose names match
+               this Unix-style filename pattern (e.g., '*.jpg')
+        @param property_name property_name to set for each file
+        @param property_value what to set the property_name to
+        """
+        all_files = os.listdir(self._directory)
+        matching_files = fnmatch.filter(all_files, filename_pattern)
+        self.SetProperty(matching_files, property_name, property_value)