Make EagleEye input/output directories customizable via command-line options.

Two EagleEye (chromeOS perf dashboard) scripts previously used hard-coded
locations for their input/output directories.  This CL makes those locations
customizable via command-line options to the scripts.

BUG=chromium-os:38150
TEST=Verified the scripts can read input files and write output files to
custom directories specified as command-line options to the scripts.

Change-Id: Icc535a61d2967e96fc2bf2af6f48be2cb44398d5
Reviewed-on: https://gerrit.chromium.org/gerrit/44324
Commit-Queue: Dennis Jeffrey <dennisjeffrey@chromium.org>
Reviewed-by: Dennis Jeffrey <dennisjeffrey@chromium.org>
Tested-by: Dennis Jeffrey <dennisjeffrey@chromium.org>
diff --git a/frontend/perf-dashboard/extract_perf.py b/frontend/perf-dashboard/extract_perf.py
index 6f69a1e..93b62ba 100644
--- a/frontend/perf-dashboard/extract_perf.py
+++ b/frontend/perf-dashboard/extract_perf.py
@@ -13,6 +13,7 @@
     python extract_perf.py -v
 
 Run with -h to see the full set of command-line options.
+
 """
 
 import datetime
@@ -50,16 +51,15 @@
     'db': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'database'),
 }
 
-# Paths to files.
 _ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
 _CHART_CONFIG_FILE = os.path.join(_ROOT_DIR, 'croschart_defaults.json')
-_DEST_DATA_DIR = os.path.join(_ROOT_DIR, 'data')
-_COMPLETED_ID_FILE = os.path.join(_DEST_DATA_DIR, 'job_id_complete.txt')
-_CURR_PID_FILE = os.path.join(_DEST_DATA_DIR, __file__ + '.curr_pid.txt')
+_COMPLETED_ID_FILE_NAME = 'job_id_complete.txt'
+_CURR_PID_FILE_NAME = __file__ + '.curr_pid.txt'
 
 # Values that can be configured through options.
 _NUM_DAYS_BACK = 7  # Ignore perf test runs in database that finished more than
                     # this many days ago.
+_DEST_DATA_DIR = _ROOT_DIR
 
 # Other values that can only be configured here in the code.
 _AFE = frontend.AFE()
@@ -78,6 +78,7 @@
     @param completed_ids: see query_database().
 
     @return A list of string job IDs from the database that should be processed.
+
     """
     query = ('SELECT DISTINCT afe_job_id '
              'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) '
@@ -95,7 +96,7 @@
     return job_ids
 
 
-def write_perf_info_to_disk(job_id, result_dict, test_dir):
+def write_perf_info_to_disk(job_id, result_dict, test_dir, output_dir):
     """Writes extracted perf data for the given job ID to disk.
 
     Also writes the job ID to disk to mark it as having been processed.  Note
@@ -105,6 +106,8 @@
     @param job_id: The string job ID.
     @param result_dict: A dictionary of associated perf info to write to disk.
     @param test_dir: The string directory name in which to write perf data.
+    @param output_dir: The output directory in which results are being written.
+
     """
     result_out = [job_id, result_dict['job_name'], result_dict['platform']]
     perf_items = []
@@ -116,11 +119,12 @@
     with open(file_name, 'a') as fp:
         fp.write(simplejson.dumps(result_out) + '\n')
 
-    with open(_COMPLETED_ID_FILE, 'a') as fp:
+    with open(os.path.join(output_dir, _COMPLETED_ID_FILE_NAME), 'a') as fp:
         fp.write(job_id + '\n')
 
 
-def extract_perf_for_job_id(cursor, job_id, unexpected_job_names, test_dir):
+def extract_perf_for_job_id(cursor, job_id, unexpected_job_names, test_dir,
+                            output_dir):
     """Extracts perf data for a given job, then writes to local text files.
 
     @param cursor: A MySQLdb.cursor object used for interacting with a database.
@@ -128,10 +132,12 @@
     @param unexpected_job_names: A set of job names encountered so far that are
         not associated with a known platform type.
     @param test_dir: The string directory name in which to write perf data.
+    @param output_dir: The output directory in which results are being written.
 
     @return True, if data for the specified job ID is written to disk, or
         False if not (will be False if the job ID is not associated with a known
         platform type).
+
     """
     query = ('SELECT job_name,iteration_key,iteration_value '
              'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) '
@@ -168,11 +174,12 @@
     if 'platform' not in result:
         return False
 
-    write_perf_info_to_disk(job_id, result, test_dir)
+    write_perf_info_to_disk(job_id, result, test_dir, output_dir)
     return True
 
 
-def query_database(cursor, test_name, completed_ids, oldest_db_lookup_date):
+def query_database(cursor, test_name, completed_ids, oldest_db_lookup_date,
+                   output_dir):
     """Queries database for perf values and stores them into local text files.
 
     This function performs the work only for the specified test case.
@@ -183,10 +190,12 @@
         extracted from the database.
     @param oldest_db_lookup_date: The oldest date (represented as a string) for
         which we want to consider perf values in the database.
+    @param output_dir: The output directory in which results are being written.
 
     @return The number of new job IDs that have been extracted/processed.
+
     """
-    test_dir = os.path.join(_DEST_DATA_DIR, test_name)
+    test_dir = os.path.join(output_dir, test_name)
     if not os.path.isdir(test_dir):
         os.makedirs(test_dir)
 
@@ -201,7 +210,7 @@
         logging.debug('Processing job %d of %d', i + 1, len(job_ids))
 
         if extract_perf_for_job_id(cursor, job_id, unexpected_job_names,
-                                   test_dir):
+                                   test_dir, output_dir):
             completed_ids.add(job_id)
             num_newly_added += 1
 
@@ -212,13 +221,15 @@
     return num_newly_added
 
 
-def extract_new_perf_data(cursor, options):
+def extract_new_perf_data(cursor, output_dir, options):
     """Extracts new perf data from database and writes data to local text files.
 
     @param cursor: A MySQLdb.cursor object used for interacting with a database.
+    @param output_dir: The output directory in which results are being written.
     @param options: An optparse.OptionParser options object.
 
     @return The number of new job IDs that have been extracted/processed.
+
     """
     charts = {}
     with open(_CHART_CONFIG_FILE, 'r') as fp:
@@ -241,8 +252,9 @@
 
     # Get list of already-completed job IDs so we don't re-fetch their data.
     completed_ids = set()
-    if os.path.isfile(_COMPLETED_ID_FILE):
-        with open(_COMPLETED_ID_FILE, 'r') as fp:
+    completed_id_file = os.path.join(output_dir, _COMPLETED_ID_FILE_NAME)
+    if os.path.isfile(completed_id_file):
+        with open(completed_id_file, 'r') as fp:
             job_ids = map(lambda x: x.strip(), fp.readlines())
             for job_id in job_ids:
                 completed_ids.add(job_id)
@@ -253,15 +265,20 @@
                      len(test_names), test_name)
 
         num_newly_added += query_database(cursor, test_name, completed_ids,
-                                          oldest_db_lookup_date)
+                                          oldest_db_lookup_date, output_dir)
 
     return num_newly_added
 
 
-def cleanup():
-    """Cleans up when this script is done."""
-    if os.path.isfile(_CURR_PID_FILE):
-        os.remove(_CURR_PID_FILE)
+def cleanup(output_dir):
+    """Cleans up when this script is done.
+
+    @param output_dir: The output directory in which results are being written.
+
+    """
+    curr_pid_file = os.path.join(output_dir, _CURR_PID_FILE_NAME)
+    if os.path.isfile(curr_pid_file):
+        os.remove(curr_pid_file)
 
 
 def main():
@@ -273,6 +290,12 @@
                            'computed within this many days ago (if this script '
                            'is invoked daily, no need to consider history from '
                            'many days back). Defaults to %default days back.')
+    parser.add_option('-o', '--output-dir', metavar='DIR', type='string',
+                      default=_DEST_DATA_DIR,
+                      help='Absolute path to the output directory in which to '
+                           'store the raw perf data extracted from the '
+                           'database. Will be written into a subfolder named '
+                           '"data". Defaults to "%default".')
     parser.add_option('-c', '--cloud-sql', action='store_true', default=False,
                       help='Connect to the chromeos-lab CloudSQL database, '
                            'rather than the original MySQL autotest database.')
@@ -310,12 +333,14 @@
 
     logging.debug('Database connection complete.')
 
-    if not os.path.isdir(_DEST_DATA_DIR):
-        os.makedirs(_DEST_DATA_DIR)
+    output_dir = os.path.join(options.output_dir, 'data')
+    if not os.path.isdir(output_dir):
+        os.makedirs(output_dir)
 
-    common.die_if_already_running(_CURR_PID_FILE, logging)
-    num_newly_added = extract_new_perf_data(cursor, options)
-    cleanup()
+    common.die_if_already_running(
+        os.path.join(output_dir, _CURR_PID_FILE_NAME), logging)
+    num_newly_added = extract_new_perf_data(cursor, output_dir, options)
+    cleanup(output_dir)
     logging.info('Done! Added info for %d new job IDs', num_newly_added)