rebaseline_server: download actual-results.json files from GCS instead of SVN

BUG=skia:553
R=borenet@google.com

Author: epoger@google.com

Review URL: https://codereview.chromium.org/310093003
diff --git a/gm/rebaseline_server/compare_configs.py b/gm/rebaseline_server/compare_configs.py
index aa26ba6..4075da4 100755
--- a/gm/rebaseline_server/compare_configs.py
+++ b/gm/rebaseline_server/compare_configs.py
@@ -14,28 +14,12 @@
 import fnmatch
 import json
 import logging
-import os
 import re
-import sys
 import time
 
 # Imports from within Skia
-#
-# TODO(epoger): Once we move the create_filepath_url() function out of
-# download_actuals into a shared utility module, we won't need to import
-# download_actuals anymore.
-#
-# We need to add the 'gm' directory, so that we can import gm_json.py within
-# that directory.  That script allows us to parse the actual-results.json file
-# written out by the GM tool.
-# Make sure that the 'gm' dir is in the PYTHONPATH, but add it at the *end*
-# so any dirs that are already in the PYTHONPATH will be preferred.
-PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
-GM_DIRECTORY = os.path.dirname(PARENT_DIRECTORY)
-TRUNK_DIRECTORY = os.path.dirname(GM_DIRECTORY)
-if GM_DIRECTORY not in sys.path:
-  sys.path.append(GM_DIRECTORY)
-import download_actuals
+import fix_pythonpath  # must do this first
+from pyutils import url_utils
 import gm_json
 import imagediffdb
 import imagepair
@@ -71,7 +55,7 @@
     self._image_diff_db = imagediffdb.ImageDiffDB(generated_images_root)
     self._diff_base_url = (
         diff_base_url or
-        download_actuals.create_filepath_url(generated_images_root))
+        url_utils.create_filepath_url(generated_images_root))
     self._actuals_root = actuals_root
     self._load_config_pairs(configs)
     self._timestamp = int(time.time())
diff --git a/gm/rebaseline_server/compare_rendered_pictures.py b/gm/rebaseline_server/compare_rendered_pictures.py
index 75a80d4..73d0627 100755
--- a/gm/rebaseline_server/compare_rendered_pictures.py
+++ b/gm/rebaseline_server/compare_rendered_pictures.py
@@ -13,26 +13,11 @@
 import logging
 import os
 import re
-import sys
 import time
 
 # Imports from within Skia
-#
-# TODO(epoger): Once we move the create_filepath_url() function out of
-# download_actuals into a shared utility module, we won't need to import
-# download_actuals anymore.
-#
-# We need to add the 'gm' directory, so that we can import gm_json.py within
-# that directory.  That script allows us to parse the actual-results.json file
-# written out by the GM tool.
-# Make sure that the 'gm' dir is in the PYTHONPATH, but add it at the *end*
-# so any dirs that are already in the PYTHONPATH will be preferred.
-PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
-GM_DIRECTORY = os.path.dirname(PARENT_DIRECTORY)
-TRUNK_DIRECTORY = os.path.dirname(GM_DIRECTORY)
-if GM_DIRECTORY not in sys.path:
-  sys.path.append(GM_DIRECTORY)
-import download_actuals
+import fix_pythonpath  # must do this first
+from pyutils import url_utils
 import gm_json
 import imagediffdb
 import imagepair
@@ -74,7 +59,7 @@
     self._image_base_url = image_base_url
     self._diff_base_url = (
         diff_base_url or
-        download_actuals.create_filepath_url(generated_images_root))
+        url_utils.create_filepath_url(generated_images_root))
     self._load_result_pairs(actuals_root, subdirs)
     self._timestamp = int(time.time())
     logging.info('Results complete; took %d seconds.' %
diff --git a/gm/rebaseline_server/compare_to_expectations.py b/gm/rebaseline_server/compare_to_expectations.py
index e9677bd..1a93c66 100755
--- a/gm/rebaseline_server/compare_to_expectations.py
+++ b/gm/rebaseline_server/compare_to_expectations.py
@@ -20,22 +20,8 @@
 import time
 
 # Imports from within Skia
-#
-# TODO(epoger): Once we move the create_filepath_url() function out of
-# download_actuals into a shared utility module, we won't need to import
-# download_actuals anymore.
-#
-# We need to add the 'gm' directory, so that we can import gm_json.py within
-# that directory.  That script allows us to parse the actual-results.json file
-# written out by the GM tool.
-# Make sure that the 'gm' dir is in the PYTHONPATH, but add it at the *end*
-# so any dirs that are already in the PYTHONPATH will be preferred.
-PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
-GM_DIRECTORY = os.path.dirname(PARENT_DIRECTORY)
-TRUNK_DIRECTORY = os.path.dirname(GM_DIRECTORY)
-if GM_DIRECTORY not in sys.path:
-  sys.path.append(GM_DIRECTORY)
-import download_actuals
+import fix_pythonpath  # must do this first
+from pyutils import url_utils
 import gm_json
 import imagediffdb
 import imagepair
@@ -47,6 +33,7 @@
     results.KEY__EXPECTATIONS__IGNOREFAILURE,
     results.KEY__EXPECTATIONS__REVIEWED,
 ]
+TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
 DEFAULT_EXPECTATIONS_DIR = os.path.join(TRUNK_DIRECTORY, 'expectations', 'gm')
 DEFAULT_IGNORE_FAILURES_FILE = 'ignored-tests.txt'
 
@@ -88,7 +75,7 @@
     self._image_diff_db = imagediffdb.ImageDiffDB(generated_images_root)
     self._diff_base_url = (
         diff_base_url or
-        download_actuals.create_filepath_url(generated_images_root))
+        url_utils.create_filepath_url(generated_images_root))
     self._actuals_root = actuals_root
     self._expected_root = expected_root
     self._ignore_failures_on_these_tests = []
diff --git a/gm/rebaseline_server/download_actuals.py b/gm/rebaseline_server/download_actuals.py
index 636958b..2f92898 100755
--- a/gm/rebaseline_server/download_actuals.py
+++ b/gm/rebaseline_server/download_actuals.py
@@ -10,44 +10,19 @@
 """
 
 # System-level imports
-import contextlib
 import optparse
 import os
 import posixpath
 import re
-import shutil
-import sys
-import urllib
 import urllib2
-import urlparse
 
 # Imports from within Skia
-#
-# We need to add the 'gm' and 'tools' directories, so that we can import
-# gm_json.py and buildbot_globals.py.
-#
-# Make sure that these dirs are in the PYTHONPATH, but add them at the *end*
-# so any dirs that are already in the PYTHONPATH will be preferred.
-#
-# TODO(epoger): Is it OK for this to depend on the 'tools' dir, given that
-# the tools dir is dependent on the 'gm' dir (to import gm_json.py)?
-TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
-GM_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'gm')
-TOOLS_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'tools')
-if GM_DIRECTORY not in sys.path:
-  sys.path.append(GM_DIRECTORY)
-if TOOLS_DIRECTORY not in sys.path:
-  sys.path.append(TOOLS_DIRECTORY)
+import fix_pythonpath  # must do this first
+from pyutils import gs_utils
+from pyutils import url_utils
 import buildbot_globals
 import gm_json
 
-# Imports from third-party code
-APICLIENT_DIRECTORY = os.path.join(
-    TRUNK_DIRECTORY, 'third_party', 'externals', 'google-api-python-client')
-if APICLIENT_DIRECTORY not in sys.path:
-  sys.path.append(APICLIENT_DIRECTORY)
-from googleapiclient.discovery import build as build_service
-
 
 GM_SUMMARIES_BUCKET = buildbot_globals.Get('gm_summaries_bucket')
 DEFAULT_ACTUALS_BASE_URL = (
@@ -105,98 +80,19 @@
             test_name=test, hash_type=hash_type, hash_digest=hash_digest,
             gm_actuals_root_url=self._gm_actuals_root_url)
         dest_path = os.path.join(dest_dir, config, test + '.png')
-        # TODO(epoger): To speed this up, we should only download files that
-        # we don't already have on local disk.
-        copy_contents(source_url=source_url, dest_path=dest_path,
-                      create_subdirs_if_needed=True)
+        url_utils.copy_contents(source_url=source_url, dest_path=dest_path,
+                                create_subdirs_if_needed=True)
 
 
-def create_filepath_url(filepath):
-  """ Returns a file:/// URL pointing at the given filepath on local disk.
-
-  For now, this is only used by unittests, but I anticipate it being useful
-  in production, as a way for developers to run rebaseline_server over locally
-  generated images.
-
-  TODO(epoger): Move this function, and copy_contents(), into a shared
-  utility module.  They are generally useful.
+def get_builders_list(summaries_bucket=GM_SUMMARIES_BUCKET):
+  """ Returns the list of builders we have actual results for.
 
   Args:
-    filepath: string; path to a file on local disk (may be absolute or relative,
-        and the file does not need to exist)
-
-  Returns:
-    A file:/// URL pointing at the file.  Regardless of whether filepath was
-        specified as a relative or absolute path, the URL will contain an
-        absolute path to the file.
-
-  Raises:
-    An Exception, if filepath is already a URL.
+    summaries_bucket: Google Cloud Storage bucket containing the summary
+        JSON files
   """
-  if urlparse.urlparse(filepath).scheme:
-    raise Exception('"%s" is already a URL' % filepath)
-  return urlparse.urljoin(
-      'file:', urllib.pathname2url(os.path.abspath(filepath)))
-
-
-def copy_contents(source_url, dest_path, create_subdirs_if_needed=False):
-  """ Copies the full contents of the URL 'source_url' into
-  filepath 'dest_path'.
-
-  Args:
-    source_url: string; complete URL to read from
-    dest_path: string; complete filepath to write to (may be absolute or
-        relative)
-    create_subdirs_if_needed: boolean; whether to create subdirectories as
-        needed to create dest_path
-
-  Raises:
-    Some subclass of Exception if unable to read source_url or write dest_path.
-  """
-  if create_subdirs_if_needed:
-    dest_dir = os.path.dirname(dest_path)
-    if not os.path.exists(dest_dir):
-      os.makedirs(dest_dir)
-  with contextlib.closing(urllib.urlopen(source_url)) as source_handle:
-    with open(dest_path, 'wb') as dest_handle:
-      shutil.copyfileobj(fsrc=source_handle, fdst=dest_handle)
-
-
-def gcs_list_bucket_contents(bucket, subdir=None):
-  """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.
-
-  Uses the API documented at
-  https://developers.google.com/storage/docs/json_api/v1/objects/list
-
-  Args:
-    bucket: name of the Google Storage bucket
-    subdir: directory within the bucket to list, or None for root directory
-  """
-  # The GCS command relies on the subdir name (if any) ending with a slash.
-  if subdir and not subdir.endswith('/'):
-    subdir += '/'
-  subdir_length = len(subdir) if subdir else 0
-
-  storage = build_service('storage', 'v1')
-  command = storage.objects().list(
-      bucket=bucket, delimiter='/', fields='items(name),prefixes',
-      prefix=subdir)
-  results = command.execute()
-
-  # The GCS command returned two subdicts:
-  # prefixes: the full path of every directory within subdir, with trailing '/'
-  # items: property dict for each file object within subdir
-  #        (including 'name', which is full path of the object)
-  dirs = []
-  for dir_fullpath in results.get('prefixes', []):
-    dir_basename = dir_fullpath[subdir_length:]
-    dirs.append(dir_basename[:-1])  # strip trailing slash
-  files = []
-  for file_properties in results.get('items', []):
-    file_fullpath = file_properties['name']
-    file_basename = file_fullpath[subdir_length:]
-    files.append(file_basename)
-  return (dirs, files)
+  dirs, _ = gs_utils.list_bucket_contents(bucket=GM_SUMMARIES_BUCKET)
+  return dirs
 
 
 def main():
@@ -234,8 +130,7 @@
   (params, remaining_args) = parser.parse_args()
 
   if params.list_builders:
-    dirs, _ = gcs_list_bucket_contents(bucket=GM_SUMMARIES_BUCKET)
-    print '\n'.join(dirs)
+    print '\n'.join(get_builders_list())
     return
 
   # Make sure all required options were set,
diff --git a/gm/rebaseline_server/download_actuals_test.py b/gm/rebaseline_server/download_actuals_test.py
index 8813530..c405a3c 100755
--- a/gm/rebaseline_server/download_actuals_test.py
+++ b/gm/rebaseline_server/download_actuals_test.py
@@ -25,6 +25,8 @@
 import urllib
 
 # Imports from within Skia
+import fix_pythonpath  # must do this first
+from pyutils import url_utils
 import base_unittest
 import download_actuals
 
@@ -34,52 +36,14 @@
   def test_fetch(self):
     """Tests fetch() of GM results from actual-results.json ."""
     downloader = download_actuals.Download(
-        actuals_base_url=download_actuals.create_filepath_url(
+        actuals_base_url=url_utils.create_filepath_url(
             os.path.join(self._input_dir, 'gm-actuals')),
-        gm_actuals_root_url=download_actuals.create_filepath_url(
+        gm_actuals_root_url=url_utils.create_filepath_url(
             os.path.join(self._input_dir, 'fake-gm-imagefiles')))
     downloader.fetch(
         builder_name='Test-Android-GalaxyNexus-SGX540-Arm7-Release',
         dest_dir=self._output_dir_actual)
 
-  def test_create_filepath_url(self):
-    """Tests create_filepath_url(). """
-    with self.assertRaises(Exception):
-      url_or_path.create_filepath_url('http://1.2.3.4/path')
-    # Pass absolute filepath.
-    self.assertEquals(
-        download_actuals.create_filepath_url(
-            '%sdir%sfile' % (os.path.sep, os.path.sep)),
-        'file:///dir/file')
-    # Pass relative filepath.
-    self.assertEquals(
-        download_actuals.create_filepath_url(os.path.join('dir', 'file')),
-        'file://%s/dir/file' % urllib.pathname2url(os.getcwd()))
-
-  def test_copy_contents(self):
-    """Tests copy_contents(). """
-    contents = 'these are the contents'
-    tempdir_path = tempfile.mkdtemp()
-    try:
-      source_path = os.path.join(tempdir_path, 'source')
-      source_url = download_actuals.create_filepath_url(source_path)
-      with open(source_path, 'w') as source_handle:
-        source_handle.write(contents)
-      dest_path = os.path.join(tempdir_path, 'new_subdir', 'dest')
-      # Destination subdir does not exist, so copy_contents() should fail
-      # if create_subdirs_if_needed is False.
-      with self.assertRaises(Exception):
-        download_actuals.copy_contents(source_url=source_url,
-                                       dest_path=dest_path,
-                                       create_subdirs_if_needed=False)
-      # If create_subdirs_if_needed is True, it should work.
-      download_actuals.copy_contents(source_url=source_url,
-                                     dest_path=dest_path,
-                                     create_subdirs_if_needed=True)
-      self.assertEquals(open(dest_path).read(), contents)
-    finally:
-      shutil.rmtree(tempdir_path)
-
 
 def main():
   base_unittest.main(DownloadTest)
diff --git a/gm/rebaseline_server/fix_pythonpath.py b/gm/rebaseline_server/fix_pythonpath.py
new file mode 100755
index 0000000..ed578ce
--- /dev/null
+++ b/gm/rebaseline_server/fix_pythonpath.py
@@ -0,0 +1,21 @@
+#!/usr/bin/python
+
+"""
+Copyright 2014 Google Inc.
+
+Use of this source code is governed by a BSD-style license that can be
+found in the LICENSE file.
+
+Adds [trunk]/gm and [trunk]/tools to PYTHONPATH, if they aren't already there.
+"""
+
+import os
+import sys
+
+TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+GM_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'gm')
+TOOLS_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'tools')
+if GM_DIRECTORY not in sys.path:
+  sys.path.append(GM_DIRECTORY)
+if TOOLS_DIRECTORY not in sys.path:
+  sys.path.append(TOOLS_DIRECTORY)
diff --git a/gm/rebaseline_server/results.py b/gm/rebaseline_server/results.py
index 70b2342..d17bc3d 100755
--- a/gm/rebaseline_server/results.py
+++ b/gm/rebaseline_server/results.py
@@ -13,19 +13,9 @@
 import fnmatch
 import os
 import re
-import sys
 
 # Imports from within Skia
-#
-# We need to add the 'gm' directory, so that we can import gm_json.py within
-# that directory.  That script allows us to parse the actual-results.json file
-# written out by the GM tool.
-# Make sure that the 'gm' dir is in the PYTHONPATH, but add it at the *end*
-# so any dirs that are already in the PYTHONPATH will be preferred.
-PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
-GM_DIRECTORY = os.path.dirname(PARENT_DIRECTORY)
-if GM_DIRECTORY not in sys.path:
-  sys.path.append(GM_DIRECTORY)
+import fix_pythonpath  # must do this first
 import gm_json
 import imagepairset
 
@@ -57,6 +47,7 @@
 IMAGE_FILENAME_RE = re.compile(gm_json.IMAGE_FILENAME_PATTERN)
 IMAGE_FILENAME_FORMATTER = '%s_%s.png'  # pass in (testname, config)
 
+PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
 DEFAULT_ACTUALS_DIR = '.gm-actuals'
 DEFAULT_GENERATED_IMAGES_ROOT = os.path.join(
     PARENT_DIRECTORY, '.generated-images')
diff --git a/gm/rebaseline_server/server.py b/gm/rebaseline_server/server.py
index 0680779..0079ec5 100755
--- a/gm/rebaseline_server/server.py
+++ b/gm/rebaseline_server/server.py
@@ -20,28 +20,14 @@
 import shutil
 import socket
 import subprocess
-import sys
 import thread
 import threading
 import time
 import urlparse
 
 # Imports from within Skia
-#
-# We need to add the 'tools' directory for svn.py, and the 'gm' directory for
-# gm_json.py .
-# that directory.
-# Make sure that the 'tools' dir is in the PYTHONPATH, but add it at the *end*
-# so any dirs that are already in the PYTHONPATH will be preferred.
-PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
-GM_DIRECTORY = os.path.dirname(PARENT_DIRECTORY)
-TRUNK_DIRECTORY = os.path.dirname(GM_DIRECTORY)
-TOOLS_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'tools')
-if TOOLS_DIRECTORY not in sys.path:
-  sys.path.append(TOOLS_DIRECTORY)
-import svn
-if GM_DIRECTORY not in sys.path:
-  sys.path.append(GM_DIRECTORY)
+import fix_pythonpath  # must do this first
+from pyutils import gs_utils
 import gm_json
 
 # Imports from local dir
@@ -51,6 +37,7 @@
 # https://codereview.chromium.org/195943004/diff/1/gm/rebaseline_server/server.py#newcode44
 import compare_configs
 import compare_to_expectations
+import download_actuals
 import imagepairset
 import results as results_mod
 
@@ -74,10 +61,12 @@
 KEY__EDITS__OLD_RESULTS_TYPE = 'oldResultsType'
 
 DEFAULT_ACTUALS_DIR = results_mod.DEFAULT_ACTUALS_DIR
-DEFAULT_ACTUALS_REPO_REVISION = 'HEAD'
-DEFAULT_ACTUALS_REPO_URL = 'http://skia-autogen.googlecode.com/svn/gm-actual'
+DEFAULT_GM_SUMMARIES_BUCKET = download_actuals.GM_SUMMARIES_BUCKET
+DEFAULT_JSON_FILENAME = download_actuals.DEFAULT_JSON_FILENAME
 DEFAULT_PORT = 8888
 
+PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
+TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(PARENT_DIRECTORY))
 # Directory, relative to PARENT_DIRECTORY, within which the server will serve
 # out live results (not static files).
 RESULTS_SUBDIR = 'results'
@@ -139,24 +128,6 @@
   return host
 
 
-def _create_svn_checkout(dir_path, repo_url):
-  """Creates local checkout of an SVN repository at the specified directory
-  path, returning an svn.Svn object referring to the local checkout.
-
-  Args:
-    dir_path: path to the local checkout; if this directory does not yet exist,
-              it will be created and the repo will be checked out into it
-    repo_url: URL of SVN repo to check out into dir_path (unless the local
-              checkout already exists)
-  Returns: an svn.Svn object referring to the local checkout.
-  """
-  local_checkout = svn.Svn(dir_path)
-  if not os.path.isdir(dir_path):
-    os.makedirs(dir_path)
-    local_checkout.Checkout(repo_url, '.')
-  return local_checkout
-
-
 def _create_index(file_path, config_pairs):
   """Creates an index file linking to all results available from this server.
 
@@ -213,18 +184,18 @@
 
   def __init__(self,
                actuals_dir=DEFAULT_ACTUALS_DIR,
-               actuals_repo_revision=DEFAULT_ACTUALS_REPO_REVISION,
-               actuals_repo_url=DEFAULT_ACTUALS_REPO_URL,
+               json_filename=DEFAULT_JSON_FILENAME,
+               gm_summaries_bucket=DEFAULT_GM_SUMMARIES_BUCKET,
                port=DEFAULT_PORT, export=False, editable=True,
                reload_seconds=0, config_pairs=None, builder_regex_list=None):
     """
     Args:
       actuals_dir: directory under which we will check out the latest actual
           GM results
-      actuals_repo_revision: revision of actual-results.json files to process
-      actuals_repo_url: SVN repo to download actual-results.json files from;
-          if None or '', don't fetch new actual-results files at all,
-          just compare to whatever files are already in actuals_dir
+      json_filename: basename of the JSON summary file to load for each builder
+      gm_summaries_bucket: Google Storage bucket to download json_filename
+          files from; if None or '', don't fetch new actual-results files
+          at all, just compare to whatever files are already in actuals_dir
       port: which TCP port to listen on for HTTP requests
       export: whether to allow HTTP clients on other hosts to access this server
       editable: whether HTTP clients are allowed to submit new baselines
@@ -237,8 +208,8 @@
           we will process. If None, process all builders.
     """
     self._actuals_dir = actuals_dir
-    self._actuals_repo_revision = actuals_repo_revision
-    self._actuals_repo_url = actuals_repo_url
+    self._json_filename = json_filename
+    self._gm_summaries_bucket = gm_summaries_bucket
     self._port = port
     self._export = export
     self._editable = editable
@@ -250,11 +221,6 @@
             PARENT_DIRECTORY, STATIC_CONTENTS_SUBDIR, GENERATED_HTML_SUBDIR,
             "index.html"),
         config_pairs=config_pairs)
-    # TODO(epoger): Create shareable functions within download_actuals.py that
-    # we can use both there and here to download the actual image results.
-    if actuals_repo_url:
-      self._actuals_repo = _create_svn_checkout(
-          dir_path=actuals_dir, repo_url=actuals_repo_url)
 
     # Reentrant lock that must be held whenever updating EITHER of:
     # 1. self._results
@@ -302,26 +268,66 @@
     with self.results_rlock:
       if invalidate:
         self._results = None
-      if self._actuals_repo_url:
+      if self._gm_summaries_bucket:
         logging.info(
-            'Updating actual GM results in %s to revision %s from repo %s ...'
-            % (
-                self._actuals_dir, self._actuals_repo_revision,
-                self._actuals_repo_url))
-        self._actuals_repo.Update(
-            path='.', revision=self._actuals_repo_revision)
+            'Updating GM result summaries in %s from gm_summaries_bucket %s ...'
+            % (self._actuals_dir, self._gm_summaries_bucket))
+
+        # Clean out actuals_dir first, in case some builders have gone away
+        # since we last ran.
+        if os.path.isdir(self._actuals_dir):
+          shutil.rmtree(self._actuals_dir)
+
+        # Get the list of builders we care about.
+        all_builders = download_actuals.get_builders_list(
+            summaries_bucket=self._gm_summaries_bucket)
+        if self._builder_regex_list:
+          matching_builders = []
+          for builder in all_builders:
+            for regex in self._builder_regex_list:
+              if re.match(regex, builder):
+                matching_builders.append(builder)
+                break  # go on to the next builder, no need to try more regexes
+        else:
+          matching_builders = all_builders
+
+        # Download the JSON file for each builder we care about.
+        #
+        # TODO(epoger): When this is a large number of builders, we would be
+        # better off downloading them in parallel!
+        for builder in matching_builders:
+          gs_utils.download_file(
+              source_bucket=self._gm_summaries_bucket,
+              source_path=posixpath.join(builder, self._json_filename),
+              dest_path=os.path.join(self._actuals_dir, builder,
+                                     self._json_filename),
+              create_subdirs_if_needed=True)
 
       # We only update the expectations dir if the server was run with a
       # nonzero --reload argument; otherwise, we expect the user to maintain
       # her own expectations as she sees fit.
       #
-      # Because the Skia repo is moving from SVN to git, and git does not
+      # Because the Skia repo is hosted using git, and git does not
       # support updating a single directory tree, we have to update the entire
       # repo checkout.
       #
       # Because Skia uses depot_tools, we have to update using "gclient sync"
-      # instead of raw git (or SVN) update.  Happily, this will work whether
-      # the checkout was created using git or SVN.
+      # instead of raw git commands.
+      #
+      # TODO(epoger): Fetch latest expectations in some other way.
+      # Eric points out that our official documentation recommends an
+      # unmanaged Skia checkout, so "gclient sync" will not bring down updated
+      # expectations from origin/master-- you'd have to do a "git pull" of
+      # some sort instead.
+      # However, the live rebaseline_server at
+      # http://skia-tree-status.appspot.com/redirect/rebaseline-server (which
+      # is probably the only user of the --reload flag!) uses a managed
+      # checkout, so "gclient sync" works in that case.
+      # Probably the best idea is to avoid all of this nonsense by fetching
+      # updated expectations into a temp directory, and leaving the rest of
+      # the checkout alone.  This could be done using "git show", or by
+      # downloading individual expectation JSON files from
+      # skia.googlesource.com .
       if self._reload_seconds:
         logging.info(
             'Updating expected GM results in %s by syncing Skia repo ...' %
@@ -623,18 +629,11 @@
                           'actual GM results. If this directory does not '
                           'exist, it will be created. Defaults to %(default)s'),
                     default=DEFAULT_ACTUALS_DIR)
-  parser.add_argument('--actuals-repo',
-                    help=('URL of SVN repo to download actual-results.json '
-                          'files from. Defaults to %(default)s ; if set to '
-                          'empty string, just compare to actual-results '
-                          'already found in ACTUALS_DIR.'),
-                    default=DEFAULT_ACTUALS_REPO_URL)
-  parser.add_argument('--actuals-revision',
-                    help=('revision of actual-results.json files to process. '
-                          'Defaults to %(default)s .  Beware of setting this '
-                          'argument in conjunction with --editable; you '
-                          'probably only want to edit results at HEAD.'),
-                    default=DEFAULT_ACTUALS_REPO_REVISION)
+  # TODO(epoger): Before https://codereview.chromium.org/310093003 ,
+  # when this tool downloaded the JSON summaries from skia-autogen,
+  # it had an --actuals-revision the caller could specify to download
+  # actual results as of a specific point in time.  We should add similar
+  # functionality when retrieving the summaries from Google Storage.
   parser.add_argument('--builders', metavar='BUILDER_REGEX', nargs='+',
                       help=('Only process builders matching these regular '
                             'expressions.  If unspecified, process all '
@@ -652,6 +651,17 @@
                             'to access this server.  WARNING: doing so will '
                             'allow users on other hosts to modify your '
                             'GM expectations, if combined with --editable.'))
+  parser.add_argument('--gm-summaries-bucket',
+                    help=('Google Cloud Storage bucket to download '
+                          'JSON_FILENAME files from. '
+                          'Defaults to %(default)s ; if set to '
+                          'empty string, just compare to actual-results '
+                          'already found in ACTUALS_DIR.'),
+                    default=DEFAULT_GM_SUMMARIES_BUCKET)
+  parser.add_argument('--json-filename',
+                    help=('JSON summary filename to read for each builder; '
+                          'defaults to %(default)s.'),
+                    default=DEFAULT_JSON_FILENAME)
   parser.add_argument('--port', type=int,
                       help=('Which TCP port to listen on for HTTP requests; '
                             'defaults to %(default)s'),
@@ -672,8 +682,8 @@
 
   global _SERVER
   _SERVER = Server(actuals_dir=args.actuals_dir,
-                   actuals_repo_revision=args.actuals_revision,
-                   actuals_repo_url=args.actuals_repo,
+                   json_filename=args.json_filename,
+                   gm_summaries_bucket=args.gm_summaries_bucket,
                    port=args.port, export=args.export, editable=args.editable,
                    reload_seconds=args.reload, config_pairs=config_pairs,
                    builder_regex_list=args.builders)
diff --git a/tools/pyutils/__init__.py b/tools/pyutils/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tools/pyutils/__init__.py
diff --git a/tools/pyutils/gs_utils.py b/tools/pyutils/gs_utils.py
new file mode 100755
index 0000000..745276e
--- /dev/null
+++ b/tools/pyutils/gs_utils.py
@@ -0,0 +1,82 @@
+#!/usr/bin/python
+
+"""
+Copyright 2014 Google Inc.
+
+Use of this source code is governed by a BSD-style license that can be
+found in the LICENSE file.
+
+Utilities for accessing Google Cloud Storage.
+
+TODO(epoger): move this into tools/utils for broader use?
+"""
+
+# System-level imports
+import os
+import posixpath
+import sys
+
+# Imports from third-party code
+TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+APICLIENT_DIRECTORY = os.path.join(
+    TRUNK_DIRECTORY, 'third_party', 'externals', 'google-api-python-client')
+if APICLIENT_DIRECTORY not in sys.path:
+  sys.path.append(APICLIENT_DIRECTORY)
+from googleapiclient.discovery import build as build_service
+
+# Local imports
+import url_utils
+
+
+def download_file(source_bucket, source_path, dest_path,
+                  create_subdirs_if_needed=False):
+  """ Downloads a single file from Google Cloud Storage to local disk.
+
+  Args:
+    source_bucket: GCS bucket to download the file from
+    source_path: full path (Posix-style) within that bucket
+    dest_path: full path (local-OS-style) on local disk to copy the file to
+    create_subdirs_if_needed: boolean; whether to create subdirectories as
+        needed to create dest_path
+  """
+  source_http_url = posixpath.join(
+      'http://storage.googleapis.com', source_bucket, source_path)
+  url_utils.copy_contents(source_url=source_http_url, dest_path=dest_path,
+                          create_subdirs_if_needed=create_subdirs_if_needed)
+
+
+def list_bucket_contents(bucket, subdir=None):
+  """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.
+
+  Uses the API documented at
+  https://developers.google.com/storage/docs/json_api/v1/objects/list
+
+  Args:
+    bucket: name of the Google Storage bucket
+    subdir: directory within the bucket to list, or None for root directory
+  """
+  # The GCS command relies on the subdir name (if any) ending with a slash.
+  if subdir and not subdir.endswith('/'):
+    subdir += '/'
+  subdir_length = len(subdir) if subdir else 0
+
+  storage = build_service('storage', 'v1')
+  command = storage.objects().list(
+      bucket=bucket, delimiter='/', fields='items(name),prefixes',
+      prefix=subdir)
+  results = command.execute()
+
+  # The GCS command returned two subdicts:
+  # prefixes: the full path of every directory within subdir, with trailing '/'
+  # items: property dict for each file object within subdir
+  #        (including 'name', which is full path of the object)
+  dirs = []
+  for dir_fullpath in results.get('prefixes', []):
+    dir_basename = dir_fullpath[subdir_length:]
+    dirs.append(dir_basename[:-1])  # strip trailing slash
+  files = []
+  for file_properties in results.get('items', []):
+    file_fullpath = file_properties['name']
+    file_basename = file_fullpath[subdir_length:]
+    files.append(file_basename)
+  return (dirs, files)
diff --git a/tools/pyutils/url_utils.py b/tools/pyutils/url_utils.py
new file mode 100755
index 0000000..b107f56
--- /dev/null
+++ b/tools/pyutils/url_utils.py
@@ -0,0 +1,63 @@
+#!/usr/bin/python
+
+"""
+Copyright 2014 Google Inc.
+
+Use of this source code is governed by a BSD-style license that can be
+found in the LICENSE file.
+
+Utilities for working with URLs.
+
+TODO(epoger): move this into tools/utils for broader use?
+"""
+
+# System-level imports
+import contextlib
+import os
+import shutil
+import urllib
+import urlparse
+
+
+def create_filepath_url(filepath):
+  """ Returns a file:/// URL pointing at the given filepath on local disk.
+
+  Args:
+    filepath: string; path to a file on local disk (may be absolute or relative,
+        and the file does not need to exist)
+
+  Returns:
+    A file:/// URL pointing at the file.  Regardless of whether filepath was
+        specified as a relative or absolute path, the URL will contain an
+        absolute path to the file.
+
+  Raises:
+    An Exception, if filepath is already a URL.
+  """
+  if urlparse.urlparse(filepath).scheme:
+    raise Exception('"%s" is already a URL' % filepath)
+  return urlparse.urljoin(
+      'file:', urllib.pathname2url(os.path.abspath(filepath)))
+
+
+def copy_contents(source_url, dest_path, create_subdirs_if_needed=False):
+  """ Copies the full contents of the URL 'source_url' into
+  filepath 'dest_path'.
+
+  Args:
+    source_url: string; complete URL to read from
+    dest_path: string; complete filepath to write to (may be absolute or
+        relative)
+    create_subdirs_if_needed: boolean; whether to create subdirectories as
+        needed to create dest_path
+
+  Raises:
+    Some subclass of Exception if unable to read source_url or write dest_path.
+  """
+  if create_subdirs_if_needed:
+    dest_dir = os.path.dirname(dest_path)
+    if not os.path.exists(dest_dir):
+      os.makedirs(dest_dir)
+  with contextlib.closing(urllib.urlopen(source_url)) as source_handle:
+    with open(dest_path, 'wb') as dest_handle:
+      shutil.copyfileobj(fsrc=source_handle, fdst=dest_handle)
diff --git a/tools/pyutils/url_utils_test.py b/tools/pyutils/url_utils_test.py
new file mode 100755
index 0000000..ef3d8c8
--- /dev/null
+++ b/tools/pyutils/url_utils_test.py
@@ -0,0 +1,61 @@
+#!/usr/bin/python
+
+"""
+Copyright 2014 Google Inc.
+
+Use of this source code is governed by a BSD-style license that can be
+found in the LICENSE file.
+
+Test url_utils.py
+"""
+
+# System-level imports
+import os
+import shutil
+import tempfile
+import unittest
+import urllib
+
+# Imports from within Skia
+import url_utils
+
+
+class UrlUtilsTest(unittest.TestCase):
+
+  def test_create_filepath_url(self):
+    """Tests create_filepath_url(). """
+    with self.assertRaises(Exception):
+      url_utils.create_filepath_url('http://1.2.3.4/path')
+    # Pass absolute filepath.
+    self.assertEquals(
+        url_utils.create_filepath_url(
+            '%sdir%sfile' % (os.path.sep, os.path.sep)),
+        'file:///dir/file')
+    # Pass relative filepath.
+    self.assertEquals(
+        url_utils.create_filepath_url(os.path.join('dir', 'file')),
+        'file://%s/dir/file' % urllib.pathname2url(os.getcwd()))
+
+  def test_copy_contents(self):
+    """Tests copy_contents(). """
+    contents = 'these are the contents'
+    tempdir_path = tempfile.mkdtemp()
+    try:
+      source_path = os.path.join(tempdir_path, 'source')
+      source_url = url_utils.create_filepath_url(source_path)
+      with open(source_path, 'w') as source_handle:
+        source_handle.write(contents)
+      dest_path = os.path.join(tempdir_path, 'new_subdir', 'dest')
+      # Destination subdir does not exist, so copy_contents() should fail
+      # if create_subdirs_if_needed is False.
+      with self.assertRaises(Exception):
+        url_utils.copy_contents(source_url=source_url,
+                                dest_path=dest_path,
+                                create_subdirs_if_needed=False)
+      # If create_subdirs_if_needed is True, it should work.
+      url_utils.copy_contents(source_url=source_url,
+                              dest_path=dest_path,
+                              create_subdirs_if_needed=True)
+      self.assertEquals(open(dest_path).read(), contents)
+    finally:
+      shutil.rmtree(tempdir_path)
diff --git a/tools/test_all.py b/tools/test_all.py
new file mode 100755
index 0000000..6467a21
--- /dev/null
+++ b/tools/test_all.py
@@ -0,0 +1,32 @@
+#!/usr/bin/python
+
+"""
+Copyright 2014 Google Inc.
+
+Use of this source code is governed by a BSD-style license that can be
+found in the LICENSE file.
+
+Run all unittests within this directory tree, recursing into subdirectories.
+"""
+
+import os
+import unittest
+
+from tests import skimage_self_test
+
+
+def main():
+  # First, run any tests that cannot be automatically discovered (because
+  # they don't use Python's unittest framework).
+  skimage_self_test.main()
+
+  # Now discover/run all tests that use Python's unittest framework.
+  suite = unittest.TestLoader().discover(os.path.dirname(__file__),
+                                         pattern='*_test.py')
+  results = unittest.TextTestRunner(verbosity=2).run(suite)
+  print repr(results)
+  if not results.wasSuccessful():
+    raise Exception('failed one or more unittests')
+
+if __name__ == '__main__':
+  main()
diff --git a/tools/tests/__init__.py b/tools/tests/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tools/tests/__init__.py
diff --git a/tools/tests/run.sh b/tools/tests/run.sh
index 6fca3cb..fd12711 100755
--- a/tools/tests/run.sh
+++ b/tools/tests/run.sh
@@ -201,7 +201,7 @@
 # ('make tools/tests/run.sh work cross-platform')
 #
 
-COMMAND="python tools/tests/run_all.py"
+COMMAND="python tools/test_all.py"
 echo "$COMMAND"
 $COMMAND
 ret=$?
diff --git a/tools/tests/run_all.py b/tools/tests/run_all.py
deleted file mode 100755
index 84886f7..0000000
--- a/tools/tests/run_all.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/python
-
-"""
-Copyright 2013 Google Inc.
-
-Use of this source code is governed by a BSD-style license that can be
-found in the LICENSE file.
-
-Run all self-tests that were written in Python, raising an exception if any
-of them fail.
-"""
-
-import render_pictures_test
-import skimage_self_test
-
-def main():
-  """Run all self-tests, raising an exception if any of them fail."""
-  render_pictures_test.main()
-  skimage_self_test.main()
-
-if __name__ == '__main__':
-  main()