AUTOTEST: Trim down database entries.

This script will cleanup the database by removing all job/test entries
prior to the provided date.

It first cleans up all tables that use the test/job id as a foriegn keys,
by doing a delete using join statements. Once those tables have been cleaned
up, it will delete the entries from tko_tests and tko_jobs.

BUG=chromium-os:34056
TEST=Worked successfully on atlantis3.mtv

Change-Id: I5645f449b9fdeebe537e2311b317984e0d969240
Reviewed-on: https://gerrit.chromium.org/gerrit/32050
Commit-Ready: Simran Basi <sbasi@google.com>
Reviewed-by: Simran Basi <sbasi@google.com>
Tested-by: Simran Basi <sbasi@google.com>
diff --git a/site_utils/db_cleanup_by_date.py b/site_utils/db_cleanup_by_date.py
new file mode 100644
index 0000000..e527a46
--- /dev/null
+++ b/site_utils/db_cleanup_by_date.py
@@ -0,0 +1,205 @@
+# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import datetime, re, sys, logging
+
+import common
+from autotest_lib.tko import db
+
+
+# Format Appears as: [Date] [Time] - [Msg Level] - [Message]
+LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
+# This regex makes sure the input is in the format of YYYY-MM-DD (2012-02-01)
+DATE_FORMAT_REGEX = ('^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]'
+                     '|3[01])$')
+DELETE_CMD_FORMAT = ('DELETE FROM %s USING %s INNER JOIN %s WHERE %s.%s=%s.%s '
+                     'AND %s.%s <= "%s"')
+DELETE_WITH_INDIRECTION_FORMAT = ('DELETE FROM %s USING %s INNER JOIN %s INNER'
+                                  ' JOIN %s WHERE %s.%s=%s.%s AND %s.%s=%s.%s '
+                                  'AND %s.%s <= "%s"')
+
+AFE_JOB_ID = 'afe_job_id'
+JOB_ID = 'job_id'
+JOB_IDX = 'job_idx'
+TEST_IDX = 'test_idx'
+WHERE_BEFORE_CLAUSE_FORMAT = '%s <= "%s"'
+db = db.db(autocommit=False)
+
+
+def _delete_table_data_before_date(table_to_delete_from, related_table,
+                                   primary_key, date, foreign_key=None,
+                                   time_column="started_time",
+                                   indirection_table=None,
+                                   indirection_primary_key=None,
+                                   indirection_foreign_key=None):
+    """
+    We want a delete statement that will only delete from one table while
+    using a related table to find the rows to delete.
+
+    An example mysql command:
+    DELETE FROM tko_iteration_result USING tko_iteration_result INNER JOIN
+    tko_tests WHERE tko_iteration_result.test_idx=tko_tests.test_idx AND
+    tko_tests.started_time <= '2012-02-01';
+
+    There are also tables that require 2 joins to determine which rows we want
+    to delete and we determine these rows by joining the table we want to
+    delete from with an indirection table to the actual jobs table.
+
+    @param table_to_delete_from: Table whose rows we want to delete.
+    @param related_table: Table with the date information we are selecting by.
+    @param foreign_key: Foreign key used in table_to_delete_from to reference
+                        the related table. If None, the primary_key is used.
+    @param primary_key: Primary key in the related table.
+    @param date: End date of the information we are trying to delete.
+    @param time_column: Column that we want to use to compare the date to.
+    @param indirection_table: Table we use to link the data we are trying to
+                              delete with the table with the date information.
+    @param indirection_primary_key: Key we use to connect the indirection table
+                                    to the table we are trying to delete rows
+                                    from.
+    @param indirection_foreign_key: Key we use to connect the indirection table
+                                    to the table with the date information.
+    """
+    if not foreign_key:
+        foreign_key = primary_key
+    if not related_table:
+        # Deleting from a table directly.
+        where = WHERE_BEFORE_CLAUSE_FORMAT % (time_column, date)
+        logging.debug('DELETE FROM %s WHERE %s', table_to_delete_from, where)
+        db.delete(table_to_delete_from, where)
+        return
+    if not indirection_table:
+        # Deleting using a single JOIN to get the date information.
+        sql = DELETE_CMD_FORMAT % (table_to_delete_from, table_to_delete_from,
+                                   related_table, table_to_delete_from,
+                                   foreign_key, related_table, primary_key,
+                                   related_table, time_column, date)
+    else:
+        # There are cases where we need to JOIN 3 TABLES to determine the rows
+        # we want to delete.
+        sql = DELETE_WITH_INDIRECTION_FORMAT % (table_to_delete_from,
+                table_to_delete_from, indirection_table, related_table,
+                table_to_delete_from, foreign_key, indirection_table,
+                indirection_primary_key, indirection_table,
+                indirection_foreign_key, related_table, primary_key,
+                related_table, time_column, date)
+    logging.debug('SQL: %s', sql)
+    db._exec_sql_with_commit(sql, [], None)
+
+
+def _subtract_days(date, days_to_subtract):
+    """
+    Return a date (string) that is 'days' before 'date'
+
+    @param date: date (string) we are subtracting from.
+    @param days_to_subtract: days (int) we are subtracting.
+    """
+    date_obj = datetime.datetime.strptime(date, '%Y-%m-%d')
+    difference = date_obj - datetime.timedelta(days=days_to_subtract)
+    return difference.strftime('%Y-%m-%d')
+
+
+def _delete_all_data_before_date(date):
+    """
+    Delete all the database data before a given date.
+
+    This function focuses predominately on the data for jobs in tko_jobs.
+    However not all jobs in afe_jobs are also in tko_jobs.
+
+    Therefore we delete all the afe_job and foreign key relations prior to two
+    days before date. Then we do the queries using tko_jobs and these
+    tables to ensure all the related information is gone. Even though we are
+    repeating deletes on these tables, the second delete will be quick and
+    completely thorough in ensuring we clean up all the foreign key
+    dependencies correctly.
+
+    @param date: End date of the information we are trying to delete.
+    """
+    # First cleanup all afe_job related data (prior to 2 days before date).
+    # The reason for this is not all afe_jobs may be in tko_jobs.
+    afe_date = _subtract_days(date, 2)
+    logging.debug('Cleaning up all afe_job data prior to %s.', afe_date)
+    _delete_table_data_before_date('afe_aborted_host_queue_entries',
+                                   'afe_jobs', 'id', afe_date,
+                                   time_column= 'created_on',
+                                   foreign_key='queue_entry_id',
+                                   indirection_table='afe_host_queue_entries',
+                                   indirection_primary_key='id',
+                                   indirection_foreign_key='job_id')
+    _delete_table_data_before_date('afe_special_tasks', 'afe_jobs', 'id',
+                                   afe_date, time_column='created_on',
+                                   foreign_key='queue_entry_id',
+                                   indirection_table='afe_host_queue_entries',
+                                   indirection_primary_key='id',
+                                   indirection_foreign_key='job_id')
+    _delete_table_data_before_date('afe_host_queue_entries', 'afe_jobs',
+                                   'id', afe_date, time_column='created_on',
+                                   foreign_key=JOB_ID)
+    _delete_table_data_before_date('afe_job_keyvals', 'afe_jobs', 'id',
+                                   afe_date, time_column='created_on',
+                                   foreign_key=JOB_ID)
+    _delete_table_data_before_date('afe_jobs_dependency_labels', 'afe_jobs',
+                                   'id', afe_date, time_column='created_on',
+                                   foreign_key=JOB_ID)
+    _delete_table_data_before_date('afe_jobs', None, None, afe_date,
+                                   time_column='created_on')
+
+    # Now go through and clean up all the rows related to tko_jobs prior to
+    # date.
+    logging.debug('Cleaning up all data related to tko_jobs prior to %s.',
+                  date)
+    _delete_table_data_before_date('tko_test_attributes', 'tko_tests',
+                                   TEST_IDX, date)
+    _delete_table_data_before_date('tko_test_labels_tests', 'tko_tests',
+                                   TEST_IDX, date, foreign_key= 'test_id')
+    _delete_table_data_before_date('tko_iteration_attributes', 'tko_tests',
+                                   TEST_IDX, date)
+    _delete_table_data_before_date('tko_test_attributes', 'tko_tests',
+                                   TEST_IDX, date)
+    _delete_table_data_before_date('tko_job_keyvals', 'tko_jobs', JOB_IDX,
+                                   date, foreign_key='job_id')
+    _delete_table_data_before_date('afe_aborted_host_queue_entries',
+                                   'tko_jobs', AFE_JOB_ID, date,
+                                   foreign_key='queue_entry_id',
+                                   indirection_table='afe_host_queue_entries',
+                                   indirection_primary_key='id',
+                                   indirection_foreign_key='job_id')
+    _delete_table_data_before_date('afe_special_tasks', 'tko_jobs', AFE_JOB_ID,
+                                   date, foreign_key='queue_entry_id',
+                                   indirection_table='afe_host_queue_entries',
+                                   indirection_primary_key='id',
+                                   indirection_foreign_key='job_id')
+    _delete_table_data_before_date('afe_host_queue_entries', 'tko_jobs',
+                                   AFE_JOB_ID, date, foreign_key='job_id')
+    _delete_table_data_before_date('afe_job_keyvals', 'tko_jobs', AFE_JOB_ID,
+                                   date, foreign_key='job_id')
+    _delete_table_data_before_date('afe_jobs_dependency_labels', 'tko_jobs',
+                                   AFE_JOB_ID, date, foreign_key='job_id')
+    _delete_table_data_before_date('afe_jobs', 'tko_jobs', AFE_JOB_ID,
+                                   date, foreign_key='id')
+    _delete_table_data_before_date('tko_tests', 'tko_jobs', JOB_IDX, date)
+    _delete_table_data_before_date('tko_jobs', None, None, date)
+
+
+def main():
+    logging.basicConfig(level=logging.DEBUG, format=LOGGING_FORMAT)
+    if len (sys.argv) != 2:
+        print 'USAGE: python db_cleanup_by_date.py [DATE]'
+        return
+    date = sys.argv[1]
+    if not re.match(DATE_FORMAT_REGEX, date):
+        print 'DATE must be in yyyy-mm-dd format!'
+        return
+    try:
+        _delete_all_data_before_date(date)
+    except:
+        logging.debug('Deleting the data failed, rolling back changes')
+        db.rollback()
+        raise
+    logging.debug('Committing')
+    db.commit()
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/tko/db.py b/tko/db.py
index 72c897f..6a2eb13 100644
--- a/tko/db.py
+++ b/tko/db.py
@@ -135,6 +135,10 @@
         self.con.commit()
 
 
+    def rollback(self):
+        self.con.rollback()
+
+
     def get_last_autonumber_value(self):
         self.cur.execute('SELECT LAST_INSERT_ID()', [])
         return self.cur.fetchall()[0][0]