Autotest: Find long-failing tests.

Currently looks in the (unfiltered) database and finds all tests that
have not passed for the past 60 days.

This is a basic implementation so that further commit can be smaller in size.

BUG=chromium:247047
DEPLOY=none
TEST=There exist a set of unittests that test the core send-email or not logic.

Change-Id: I6893330cc595e308c5875408989dd40a9273f9ce
Reviewed-on: https://gerrit.chromium.org/gerrit/57237
Commit-Queue: Keyar Hood <keyar@chromium.org>
Reviewed-by: Keyar Hood <keyar@chromium.org>
Tested-by: Keyar Hood <keyar@chromium.org>
diff --git a/frontend/health/__init__.py b/frontend/health/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/frontend/health/__init__.py
diff --git a/frontend/health/common.py b/frontend/health/common.py
new file mode 100644
index 0000000..a9ca715
--- /dev/null
+++ b/frontend/health/common.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Setup autotest_lib convenience imports."""
+
+import os, sys
+dirname = os.path.dirname(sys.modules[__name__].__file__)
+autotest_dir = os.path.abspath(os.path.join(dirname, os.pardir, os.pardir))
+client_dir = os.path.join(autotest_dir, 'client')
+sys.path.insert(0, client_dir)
+import setup_modules
+sys.path.pop(0)
+setup_modules.setup(base_path=autotest_dir, root_module_name='autotest_lib')
diff --git a/frontend/health/complete_failures.py b/frontend/health/complete_failures.py
new file mode 100644
index 0000000..0cf3fc7
--- /dev/null
+++ b/frontend/health/complete_failures.py
@@ -0,0 +1,174 @@
+#!/usr/bin/python
+#
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+
+import datetime, logging, shelve, sys
+
+import common
+from autotest_lib.client.common_lib import global_config, mail
+from autotest_lib.database import database_connection
+
+
+_GLOBAL_CONF = global_config.global_config
+_CONF_SECTION = 'AUTOTEST_WEB'
+
+_MYSQL_READONLY_LOGIN_CREDENTIALS = {
+    'host': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_host'),
+    'username': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_user'),
+    'password': _GLOBAL_CONF.get_config_value(
+            _CONF_SECTION, 'readonly_password'),
+    'db_name': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'database'),
+}
+
+_STORAGE_FILE = 'failure_storage'
+_DAYS_TO_BE_FAILING_TOO_LONG = 60
+_TEST_PASS_STATUS_INDEX = 6
+_MAIL_RESULTS_FROM = 'chromeos-test-health@google.com'
+_MAIL_RESULTS_TO = 'chromeos-lab-infrastructure@google.com'
+
+
+def connect_to_db():
+    """
+    Create a readonly connection to the Autotest database.
+
+    @return a readonly connection to the Autotest database.
+
+    """
+    db = database_connection.DatabaseConnection(_CONF_SECTION)
+    db.connect(**_MYSQL_READONLY_LOGIN_CREDENTIALS)
+    return db
+
+
+def load_storage():
+    """
+    Loads the storage object from disk.
+
+    This object keeps track of which tests we have already sent mail about so
+    we only send emails when the status of a test changes.
+
+    @return the storage object.
+
+    """
+    return shelve.open(_STORAGE_FILE)
+
+
+def save_storage(storage):
+    """
+    Saves the storage object to disk.
+
+    @param storage: The storage object to save to disk.
+
+    """
+    storage.close()
+
+
+def get_last_pass_times(db):
+    """
+    Get all the tests that have passed and the time they last passed.
+
+    @param db: The Autotest database connection.
+    @return the dict of test_name:last_finish_time pairs for tests that have
+            passed.
+
+    """
+    query = ('SELECT test, MAX(started_time) FROM tko_tests '
+             'WHERE status = %s GROUP BY test' % _TEST_PASS_STATUS_INDEX)
+
+    passed_tests = {result[0]: result[1] for result in db.execute(query)}
+
+    return passed_tests
+
+
+def get_all_test_names(db):
+    """
+    Get all the test names from the database.
+
+    @param db: The Autotest database connection.
+    @return a list of all the test names.
+
+    """
+    query = 'SELECT DISTINCT test FROM tko_tests'
+    return [row[0] for row in db.execute(query)]
+
+
+def get_tests_to_analyze(db):
+    """
+    Get all the tests as well as the last time they have passed.
+
+    The minimum datetime is given as last pass time for tests that have never
+    passed.
+
+    @param db: The Autotest database connection.
+
+    @return the dict of test_name:last_finish_time pairs.
+
+    """
+    last_passes = get_last_pass_times(db)
+    all_test_names = get_all_test_names(db)
+    failures_names = (set(all_test_names) - set(last_passes.keys()))
+    always_failed = {test: datetime.datetime.min for test in failures_names}
+    return dict(always_failed.items() + last_passes.items())
+
+
+def email_about_test_failure(tests, storage):
+    """
+    Send emails based on the last time tests has passed.
+
+    This involves updating the storage and sending an email if a test has
+    failed for a long time and we have not already sent an email about that
+    test.
+
+    @param tests: The test_name:time_of_last_pass pairs.
+    @param storage: The storage object.
+
+    """
+    failing_time_cutoff = datetime.timedelta(_DAYS_TO_BE_FAILING_TOO_LONG)
+    update_status = []
+
+    today = datetime.datetime.today()
+    for test, last_fail in tests.iteritems():
+        if today - last_fail >= failing_time_cutoff:
+            if test not in storage:
+                update_status.append(test)
+                storage[test] = today
+        else:
+            try:
+                del storage[test]
+            except KeyError:
+                pass
+
+    if update_status:
+        logging.info('Found %i new failing tests out %i, sending email.',
+                     len(update_status),
+                     len(tests))
+        mail.send(_MAIL_RESULTS_FROM,
+                  [_MAIL_RESULTS_TO],
+                  [],
+                  'Long Failing Tests',
+                  'The following tests have been failing for '
+                  'at least %s days:\n\n' % (_DAYS_TO_BE_FAILING_TOO_LONG) +
+                  '\n'.join(update_status))
+
+
+def main():
+    """
+    The script code.
+
+    Allows other python code to import and run this code. This will be more
+    important if a nice way to test this code can be determined.
+
+    """
+    db = connect_to_db()
+    storage = load_storage()
+    tests = get_tests_to_analyze(db)
+    email_about_test_failure(tests, storage)
+    save_storage(storage)
+
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/frontend/health/complete_failures_unittest.py b/frontend/health/complete_failures_unittest.py
new file mode 100755
index 0000000..00f9641
--- /dev/null
+++ b/frontend/health/complete_failures_unittest.py
@@ -0,0 +1,143 @@
+#!/usr/bin/python
+#
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import datetime, unittest
+
+import mox
+
+import common, complete_failures
+from autotest_lib.client.common_lib import mail
+
+
+class EmailAboutTestFailureTests(mox.MoxTestBase):
+    """
+    Test the core logic of the comlete_failures.py script.
+
+    The core logic is to send emails only if we have not yet done so for a
+    given test before and to take actions if the test has been failing for
+    long enough.
+
+    """
+    def setUp(self):
+        super(EmailAboutTestFailureTests, self).setUp()
+
+        # We need to mock out the send function in all tests or else the
+        # emails will be sent out during tests.
+        self.mox.StubOutWithMock(mail, 'send')
+
+        self._orignal_too_late = complete_failures._DAYS_TO_BE_FAILING_TOO_LONG
+
+
+    def tearDown(self):
+        complete_failures._DAYS_TO_BE_FAILING_TOO_LONG = self._orignal_too_late
+
+
+    def test_deal_with_new_failing_test(self):
+        """
+        Test adding a failing test to the storage.
+
+        We expect the email sending code to be called if it is added.
+
+        """
+        # We will want to keep all the datetime logic intact and so we need to
+        # keep a reference to the unmocked datetime.
+        self.datetime = datetime.datetime
+        self.mox.StubOutWithMock(datetime, 'datetime')
+        datetime.datetime.today().AndReturn(self.datetime(2012, 1, 1))
+        complete_failures._DAYS_TO_BE_FAILING_TOO_LONG = 60
+
+        mail.send(
+                'chromeos-test-health@google.com',
+                ['chromeos-lab-infrastructure@google.com'],
+                [],
+                'Long Failing Tests',
+                'The following tests have been failing for at '
+                'least %i days:\n\ntest'
+                    % complete_failures._DAYS_TO_BE_FAILING_TOO_LONG)
+
+        storage = {}
+
+        # The ReplayAll is required or else a mox object sneaks its way into
+        # the storage object somehow.
+        self.mox.ReplayAll()
+        complete_failures.email_about_test_failure(
+                {'test': datetime.datetime.min}, storage)
+
+        self.assertEqual(storage['test'], self.datetime(2012, 1, 1))
+        self.mox.VerifyAll()
+
+
+    def test_remove_test_if_it_has_succeeded_recently_enough(self):
+        """Test that we remove a passing test from the storage object."""
+        storage = {'test': datetime.datetime(2012, 1, 1)}
+        complete_failures._DAYS_TO_BE_FAILING_TOO_LONG = 60
+        today = datetime.datetime(2012, 4, 10)
+        safe_date = datetime.datetime(2012, 4, 9)
+
+        self.mox.StubOutWithMock(datetime, 'datetime')
+        datetime.datetime.today().AndReturn(today)
+
+        self.mox.ReplayAll()
+        complete_failures.email_about_test_failure({'test': safe_date}, storage)
+
+        self.assertTrue('test' not in storage)
+        self.mox.VerifyAll()
+
+
+    def test_no_crashing_on_test_that_has_never_failed_for_too_long(self):
+        """Test that we do not crash for tests that have always passed."""
+        storage = {}
+        complete_failures._DAYS_TO_BE_FAILING_TOO_LONG = 60
+        today = datetime.datetime(2012,4,10)
+        safe_date = datetime.datetime(2012,4,9)
+
+        self.mox.StubOutWithMock(datetime, 'datetime')
+        datetime.datetime.today().AndReturn(today)
+
+        self.mox.ReplayAll()
+        complete_failures.email_about_test_failure({'test': safe_date}, storage)
+
+        self.assertTrue('test' not in storage)
+        self.mox.VerifyAll()
+
+
+    def test_do_not_send_email_if_test_already_in_storage(self):
+        """Test only send emails on newly problematic tests."""
+        storage = {'test': datetime.datetime(2012, 1, 1)}
+        self.datetime = datetime.datetime
+        self.mox.StubOutWithMock(datetime, 'datetime')
+        datetime.datetime.today().AndReturn(self.datetime(2012, 1, 1))
+
+        self.mox.ReplayAll()
+        complete_failures.email_about_test_failure(
+                {'test': datetime.datetime.min}, storage)
+
+        self.mox.VerifyAll()
+
+
+    def test_do_not_delete_if_still_failing(self):
+        """Test that an old failing test is not removed from storage."""
+        # We will want to keep all the datetime logic intact and so we need to
+        # keep a reference to the unmocked datetime.
+        self.datetime = datetime.datetime
+        today = datetime.datetime(2012, 1, 1)
+        self.mox.StubOutWithMock(datetime, 'datetime')
+        datetime.datetime.today().AndReturn(today)
+
+        storage = {'test': datetime.datetime.min}
+
+        # The ReplayAll is required or else a mox object sneaks its way into
+        # the storage object somehow.
+        self.mox.ReplayAll()
+        complete_failures.email_about_test_failure(
+                {'test': datetime.datetime.min}, storage)
+
+        self.assertTrue('test' in storage)
+        self.mox.VerifyAll()
+
+
+if __name__ == '__main__':
+    unittest.main()