Autotest: Find long-failing tests.
Currently looks in the (unfiltered) database and finds all tests that
have not passed for the past 60 days.
This is a basic implementation so that further commit can be smaller in size.
BUG=chromium:247047
DEPLOY=none
TEST=There exist a set of unittests that test the core send-email or not logic.
Change-Id: I6893330cc595e308c5875408989dd40a9273f9ce
Reviewed-on: https://gerrit.chromium.org/gerrit/57237
Commit-Queue: Keyar Hood <keyar@chromium.org>
Reviewed-by: Keyar Hood <keyar@chromium.org>
Tested-by: Keyar Hood <keyar@chromium.org>
diff --git a/frontend/health/__init__.py b/frontend/health/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/frontend/health/__init__.py
diff --git a/frontend/health/common.py b/frontend/health/common.py
new file mode 100644
index 0000000..a9ca715
--- /dev/null
+++ b/frontend/health/common.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Setup autotest_lib convenience imports."""
+
+import os, sys
+dirname = os.path.dirname(sys.modules[__name__].__file__)
+autotest_dir = os.path.abspath(os.path.join(dirname, os.pardir, os.pardir))
+client_dir = os.path.join(autotest_dir, 'client')
+sys.path.insert(0, client_dir)
+import setup_modules
+sys.path.pop(0)
+setup_modules.setup(base_path=autotest_dir, root_module_name='autotest_lib')
diff --git a/frontend/health/complete_failures.py b/frontend/health/complete_failures.py
new file mode 100644
index 0000000..0cf3fc7
--- /dev/null
+++ b/frontend/health/complete_failures.py
@@ -0,0 +1,174 @@
+#!/usr/bin/python
+#
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+
+import datetime, logging, shelve, sys
+
+import common
+from autotest_lib.client.common_lib import global_config, mail
+from autotest_lib.database import database_connection
+
+
+_GLOBAL_CONF = global_config.global_config
+_CONF_SECTION = 'AUTOTEST_WEB'
+
+_MYSQL_READONLY_LOGIN_CREDENTIALS = {
+ 'host': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_host'),
+ 'username': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_user'),
+ 'password': _GLOBAL_CONF.get_config_value(
+ _CONF_SECTION, 'readonly_password'),
+ 'db_name': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'database'),
+}
+
+_STORAGE_FILE = 'failure_storage'
+_DAYS_TO_BE_FAILING_TOO_LONG = 60
+_TEST_PASS_STATUS_INDEX = 6
+_MAIL_RESULTS_FROM = 'chromeos-test-health@google.com'
+_MAIL_RESULTS_TO = 'chromeos-lab-infrastructure@google.com'
+
+
+def connect_to_db():
+ """
+ Create a readonly connection to the Autotest database.
+
+ @return a readonly connection to the Autotest database.
+
+ """
+ db = database_connection.DatabaseConnection(_CONF_SECTION)
+ db.connect(**_MYSQL_READONLY_LOGIN_CREDENTIALS)
+ return db
+
+
+def load_storage():
+ """
+ Loads the storage object from disk.
+
+ This object keeps track of which tests we have already sent mail about so
+ we only send emails when the status of a test changes.
+
+ @return the storage object.
+
+ """
+ return shelve.open(_STORAGE_FILE)
+
+
+def save_storage(storage):
+ """
+ Saves the storage object to disk.
+
+ @param storage: The storage object to save to disk.
+
+ """
+ storage.close()
+
+
+def get_last_pass_times(db):
+ """
+ Get all the tests that have passed and the time they last passed.
+
+ @param db: The Autotest database connection.
+ @return the dict of test_name:last_finish_time pairs for tests that have
+ passed.
+
+ """
+ query = ('SELECT test, MAX(started_time) FROM tko_tests '
+ 'WHERE status = %s GROUP BY test' % _TEST_PASS_STATUS_INDEX)
+
+ passed_tests = {result[0]: result[1] for result in db.execute(query)}
+
+ return passed_tests
+
+
+def get_all_test_names(db):
+ """
+ Get all the test names from the database.
+
+ @param db: The Autotest database connection.
+ @return a list of all the test names.
+
+ """
+ query = 'SELECT DISTINCT test FROM tko_tests'
+ return [row[0] for row in db.execute(query)]
+
+
+def get_tests_to_analyze(db):
+ """
+ Get all the tests as well as the last time they have passed.
+
+ The minimum datetime is given as last pass time for tests that have never
+ passed.
+
+ @param db: The Autotest database connection.
+
+ @return the dict of test_name:last_finish_time pairs.
+
+ """
+ last_passes = get_last_pass_times(db)
+ all_test_names = get_all_test_names(db)
+ failures_names = (set(all_test_names) - set(last_passes.keys()))
+ always_failed = {test: datetime.datetime.min for test in failures_names}
+ return dict(always_failed.items() + last_passes.items())
+
+
+def email_about_test_failure(tests, storage):
+ """
+ Send emails based on the last time tests has passed.
+
+ This involves updating the storage and sending an email if a test has
+ failed for a long time and we have not already sent an email about that
+ test.
+
+ @param tests: The test_name:time_of_last_pass pairs.
+ @param storage: The storage object.
+
+ """
+ failing_time_cutoff = datetime.timedelta(_DAYS_TO_BE_FAILING_TOO_LONG)
+ update_status = []
+
+ today = datetime.datetime.today()
+ for test, last_fail in tests.iteritems():
+ if today - last_fail >= failing_time_cutoff:
+ if test not in storage:
+ update_status.append(test)
+ storage[test] = today
+ else:
+ try:
+ del storage[test]
+ except KeyError:
+ pass
+
+ if update_status:
+ logging.info('Found %i new failing tests out %i, sending email.',
+ len(update_status),
+ len(tests))
+ mail.send(_MAIL_RESULTS_FROM,
+ [_MAIL_RESULTS_TO],
+ [],
+ 'Long Failing Tests',
+ 'The following tests have been failing for '
+ 'at least %s days:\n\n' % (_DAYS_TO_BE_FAILING_TOO_LONG) +
+ '\n'.join(update_status))
+
+
+def main():
+ """
+ The script code.
+
+ Allows other python code to import and run this code. This will be more
+ important if a nice way to test this code can be determined.
+
+ """
+ db = connect_to_db()
+ storage = load_storage()
+ tests = get_tests_to_analyze(db)
+ email_about_test_failure(tests, storage)
+ save_storage(storage)
+
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/frontend/health/complete_failures_unittest.py b/frontend/health/complete_failures_unittest.py
new file mode 100755
index 0000000..00f9641
--- /dev/null
+++ b/frontend/health/complete_failures_unittest.py
@@ -0,0 +1,143 @@
+#!/usr/bin/python
+#
+# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import datetime, unittest
+
+import mox
+
+import common, complete_failures
+from autotest_lib.client.common_lib import mail
+
+
+class EmailAboutTestFailureTests(mox.MoxTestBase):
+ """
+ Test the core logic of the comlete_failures.py script.
+
+ The core logic is to send emails only if we have not yet done so for a
+ given test before and to take actions if the test has been failing for
+ long enough.
+
+ """
+ def setUp(self):
+ super(EmailAboutTestFailureTests, self).setUp()
+
+ # We need to mock out the send function in all tests or else the
+ # emails will be sent out during tests.
+ self.mox.StubOutWithMock(mail, 'send')
+
+ self._orignal_too_late = complete_failures._DAYS_TO_BE_FAILING_TOO_LONG
+
+
+ def tearDown(self):
+ complete_failures._DAYS_TO_BE_FAILING_TOO_LONG = self._orignal_too_late
+
+
+ def test_deal_with_new_failing_test(self):
+ """
+ Test adding a failing test to the storage.
+
+ We expect the email sending code to be called if it is added.
+
+ """
+ # We will want to keep all the datetime logic intact and so we need to
+ # keep a reference to the unmocked datetime.
+ self.datetime = datetime.datetime
+ self.mox.StubOutWithMock(datetime, 'datetime')
+ datetime.datetime.today().AndReturn(self.datetime(2012, 1, 1))
+ complete_failures._DAYS_TO_BE_FAILING_TOO_LONG = 60
+
+ mail.send(
+ 'chromeos-test-health@google.com',
+ ['chromeos-lab-infrastructure@google.com'],
+ [],
+ 'Long Failing Tests',
+ 'The following tests have been failing for at '
+ 'least %i days:\n\ntest'
+ % complete_failures._DAYS_TO_BE_FAILING_TOO_LONG)
+
+ storage = {}
+
+ # The ReplayAll is required or else a mox object sneaks its way into
+ # the storage object somehow.
+ self.mox.ReplayAll()
+ complete_failures.email_about_test_failure(
+ {'test': datetime.datetime.min}, storage)
+
+ self.assertEqual(storage['test'], self.datetime(2012, 1, 1))
+ self.mox.VerifyAll()
+
+
+ def test_remove_test_if_it_has_succeeded_recently_enough(self):
+ """Test that we remove a passing test from the storage object."""
+ storage = {'test': datetime.datetime(2012, 1, 1)}
+ complete_failures._DAYS_TO_BE_FAILING_TOO_LONG = 60
+ today = datetime.datetime(2012, 4, 10)
+ safe_date = datetime.datetime(2012, 4, 9)
+
+ self.mox.StubOutWithMock(datetime, 'datetime')
+ datetime.datetime.today().AndReturn(today)
+
+ self.mox.ReplayAll()
+ complete_failures.email_about_test_failure({'test': safe_date}, storage)
+
+ self.assertTrue('test' not in storage)
+ self.mox.VerifyAll()
+
+
+ def test_no_crashing_on_test_that_has_never_failed_for_too_long(self):
+ """Test that we do not crash for tests that have always passed."""
+ storage = {}
+ complete_failures._DAYS_TO_BE_FAILING_TOO_LONG = 60
+ today = datetime.datetime(2012,4,10)
+ safe_date = datetime.datetime(2012,4,9)
+
+ self.mox.StubOutWithMock(datetime, 'datetime')
+ datetime.datetime.today().AndReturn(today)
+
+ self.mox.ReplayAll()
+ complete_failures.email_about_test_failure({'test': safe_date}, storage)
+
+ self.assertTrue('test' not in storage)
+ self.mox.VerifyAll()
+
+
+ def test_do_not_send_email_if_test_already_in_storage(self):
+ """Test only send emails on newly problematic tests."""
+ storage = {'test': datetime.datetime(2012, 1, 1)}
+ self.datetime = datetime.datetime
+ self.mox.StubOutWithMock(datetime, 'datetime')
+ datetime.datetime.today().AndReturn(self.datetime(2012, 1, 1))
+
+ self.mox.ReplayAll()
+ complete_failures.email_about_test_failure(
+ {'test': datetime.datetime.min}, storage)
+
+ self.mox.VerifyAll()
+
+
+ def test_do_not_delete_if_still_failing(self):
+ """Test that an old failing test is not removed from storage."""
+ # We will want to keep all the datetime logic intact and so we need to
+ # keep a reference to the unmocked datetime.
+ self.datetime = datetime.datetime
+ today = datetime.datetime(2012, 1, 1)
+ self.mox.StubOutWithMock(datetime, 'datetime')
+ datetime.datetime.today().AndReturn(today)
+
+ storage = {'test': datetime.datetime.min}
+
+ # The ReplayAll is required or else a mox object sneaks its way into
+ # the storage object somehow.
+ self.mox.ReplayAll()
+ complete_failures.email_about_test_failure(
+ {'test': datetime.datetime.min}, storage)
+
+ self.assertTrue('test' in storage)
+ self.mox.VerifyAll()
+
+
+if __name__ == '__main__':
+ unittest.main()