[autotest] Add a new script that deletes unused labels.

Labels such as 'cros-version', 'fw-version', 'fwrw-version',
and 'fwro-version' are stacking up in the DB, and there are
many such labels that are no longer in use.
This script can be used to delete unused labels.
We will run this script as a cron job in the machines that
run MySQL. Such machines are shards and master DB.

BUG=chromium:380365
TEST=Test with local database.

Change-Id: I0f341c3e5a5977c53b0764efd0442684b525f3fd
Reviewed-on: https://chromium-review.googlesource.com/294164
Trybot-Ready: Mungyung Ryu <mkryu@google.com>
Tested-by: Mungyung Ryu <mkryu@google.com>
Reviewed-by: Dan Shi <dshi@chromium.org>
Commit-Queue: Mungyung Ryu <mkryu@google.com>
diff --git a/client/common_lib/logging_config.py b/client/common_lib/logging_config.py
index 17069dd..9130ddd 100644
--- a/client/common_lib/logging_config.py
+++ b/client/common_lib/logging_config.py
@@ -23,14 +23,16 @@
     stdout_level = logging.INFO
     stderr_level = logging.ERROR
 
+    FILE_FORMAT = ('%(asctime)s.%(msecs)03d %(levelname)-5.5s|%(module)18.18s:'
+                   '%(lineno)4.4d| %(message)s')
+
     file_formatter = logging.Formatter(
-        fmt='%(asctime)s.%(msecs)03d %(levelname)-5.5s|%(module)18.18s:'
-            '%(lineno)4.4d| %(message)s',
-        datefmt='%m/%d %H:%M:%S')
+        fmt=FILE_FORMAT, datefmt='%m/%d %H:%M:%S')
+
+    CONSOLE_FORMAT = '%(asctime)s %(levelname)-5.5s| %(message)s'
 
     console_formatter = logging.Formatter(
-        fmt='%(asctime)s %(levelname)-5.5s| %(message)s',
-        datefmt='%H:%M:%S')
+            fmt=CONSOLE_FORMAT, datefmt='%H:%M:%S')
 
     def __init__(self, use_console=True):
         self.logger = logging.getLogger()
@@ -54,29 +56,37 @@
         return os.path.join(cls.get_autotest_root(), 'logs')
 
 
-    def add_stream_handler(self, stream, level=logging.DEBUG):
+    def add_stream_handler(self, stream, level=logging.DEBUG, datefmt=None):
         handler = logging.StreamHandler(stream)
         handler.setLevel(level)
-        handler.setFormatter(self.console_formatter)
+        formatter = self.console_formatter
+        if datefmt:
+            formatter = logging.Formatter(fmt=self.CONSOLE_FORMAT,
+                                          datefmt=datefmt)
+        handler.setFormatter(formatter)
         self.logger.addHandler(handler)
         return handler
 
 
-    def add_console_handlers(self):
-        stdout_handler = self.add_stream_handler(sys.stdout,
-                                                 level=self.stdout_level)
+    def add_console_handlers(self, datefmt=None):
+        stdout_handler = self.add_stream_handler(
+                sys.stdout, level=self.stdout_level, datefmt=datefmt)
         # only pass records *below* STDERR_LEVEL to stdout, to avoid duplication
         stdout_handler.addFilter(AllowBelowSeverity(self.stderr_level))
 
-        self.add_stream_handler(sys.stderr, self.stderr_level)
+        self.add_stream_handler(sys.stderr, self.stderr_level, datefmt)
 
 
-    def add_file_handler(self, file_path, level=logging.DEBUG, log_dir=None):
+    def add_file_handler(self, file_path, level=logging.DEBUG, log_dir=None,
+                         datefmt=None):
         if log_dir:
             file_path = os.path.join(log_dir, file_path)
         handler = logging.FileHandler(file_path)
         handler.setLevel(level)
-        handler.setFormatter(self.file_formatter)
+        formatter = self.file_formatter
+        if datefmt:
+            formatter = logging.Formatter(fmt=self.FILE_FORMAT, datefmt=datefmt)
+        handler.setFormatter(formatter)
         self.logger.addHandler(handler)
         return handler
 
@@ -103,14 +113,14 @@
                 pass
 
 
-    def configure_logging(self, use_console=True, verbose=False):
+    def configure_logging(self, use_console=True, verbose=False, datefmt=None):
         self._clear_all_handlers() # see comment at top of file
         self.logger.setLevel(self.global_level)
 
         if verbose:
             self.stdout_level = logging.DEBUG
         if use_console:
-            self.add_console_handlers()
+            self.add_console_handlers(datefmt)
 
 
 class TestingConfig(LoggingConfig):
diff --git a/site_utils/label_cleaner.py b/site_utils/label_cleaner.py
new file mode 100755
index 0000000..921394f
--- /dev/null
+++ b/site_utils/label_cleaner.py
@@ -0,0 +1,166 @@
+#!/usr/bin/python
+#
+# Copyright (c) 2015 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+
+"""Script to clean up labels that are not in use.
+
+Delete given labels from database when they are not in use.
+Labels that match the query `SELECT_USED_LABELS_FORMAT` are considered in use.
+When given labels are not in the used labels, those labels are deleted.
+
+For example, following command deletes all labels whose name begins with
+'cros-version' and are not in use.
+
+./label_cleaner.py -p cros-version
+
+If '-p' option is not given, we delete labels whose name is exactly
+'cros-version' and are not in use.
+"""
+
+
+import argparse
+import logging
+import MySQLdb
+import sys
+import traceback
+
+import common
+from autotest_lib.client.common_lib import global_config
+from autotest_lib.client.common_lib import logging_config
+
+
+DB_SERVER = global_config.global_config.get_config_value('AUTOTEST_WEB', 'host')
+USER = global_config.global_config.get_config_value('AUTOTEST_WEB', 'user')
+PASSWD = global_config.global_config.get_config_value(
+        'AUTOTEST_WEB', 'password')
+DATABASE = global_config.global_config.get_config_value(
+        'AUTOTEST_WEB', 'database')
+
+SELECT_USED_LABELS_FORMAT = """
+SELECT DISTINCT(label_id) FROM afe_autotests_dependency_labels UNION
+SELECT DISTINCT(label_id) FROM afe_hosts_labels UNION
+SELECT DISTINCT(label_id) FROM afe_jobs_dependency_labels UNION
+SELECT DISTINCT(label_id) FROM afe_shards_labels UNION
+SELECT DISTINCT(label_id) FROM afe_parameterized_jobs UNION
+SELECT DISTINCT(meta_host) FROM afe_host_queue_entries
+"""
+
+SELECT_LABELS_FORMAT = """
+SELECT id FROM afe_labels WHERE name %s
+"""
+
+DELETE_LABELS_FORMAT = """
+DELETE FROM afe_labels WHERE id in (%s)
+"""
+
+
+def get_used_labels(conn):
+    """Get labels that are currently in use.
+
+    @param conn: MySQLdb Connection object.
+
+    @return: A list of label ids.
+    """
+    cursor = conn.cursor()
+    sql = SELECT_USED_LABELS_FORMAT
+    try:
+        cursor.execute(sql)
+        rows = cursor.fetchall()
+    except:
+        logging.error("Query failed: %s", sql)
+        raise
+    return set(r[0] for r in rows)
+
+
+def fetch_labels(conn, label, prefix):
+    """Fetch labels from database.
+
+    @param conn: MySQLdb Connection object.
+    @param label: Label name to fetch.
+    @param prefix: If True, use `label` as a prefix. Otherwise, fetch
+                   labels whose name is exactly same as `label`.
+
+    @return: A list of label ids.
+    """
+    cursor = conn.cursor()
+    if prefix:
+        sql = SELECT_LABELS_FORMAT % ('LIKE "%s%%"' % label)
+    else:
+        sql = SELECT_LABELS_FORMAT % ('= "%s"' % label)
+    try:
+        cursor.execute(sql)
+        rows = cursor.fetchall()
+    except:
+        logging.error("Query failed: %s", sql)
+        raise
+    return set(r[0] for r in rows)
+
+
+def _delete_labels(conn, labels):
+    """Helper function of `delete_labels`."""
+    labels_str = ','.join([str(l) for l in labels])
+    logging.info("Deleting following labels: %s ..", labels_str)
+    sql = DELETE_LABELS_FORMAT % labels_str
+    try:
+        conn.cursor().execute(sql)
+        conn.commit()
+    except:
+        logging.error("Query failed: %s", sql)
+        raise
+    logging.info("Done.")
+
+
+def delete_labels(conn, labels, max_delete):
+    """Delete given labels from database.
+
+    @param conn: MySQLdb Connection object.
+    @param labels: Labels to delete. Set type.
+    @param max_delete: Max number of records to delete in a query.
+    """
+    if not labels:
+        logging.warn("No label to delete.")
+        return
+    while labels:
+        labels_to_del = set()
+        for i in xrange(min(len(labels), max_delete)):
+            labels_to_del.add(labels.pop())
+        _delete_labels(conn, labels_to_del)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--db', dest='db_server',
+                        help='Database server', default=DB_SERVER)
+    parser.add_argument('-p', dest='prefix', action='store_true',
+            help=('Use argument <label> as a prefix for matching. '
+                  'For example, when the argument <label> is "cros-version" '
+                  'and this option is enabled, then labels whose name '
+                  'beginning with "cros-version" are matched. When this '
+                  'option is disabled, we match labels whose name is '
+                  'exactly same as the argument <label>.'))
+    parser.add_argument('-n', dest='max_delete', type=int,
+           help=('Max number of records to delete in each query.'),
+           default=100)
+    parser.add_argument('label', help='Label name to delete')
+    options = parser.parse_args()
+
+    logging_config.LoggingConfig().configure_logging(
+            datefmt='%Y-%m-%d %H:%M:%S')
+
+    try:
+        conn = MySQLdb.connect(host=options.db_server, user=USER,
+                               passwd=PASSWD, db=DATABASE)
+        used_labels = get_used_labels(conn)
+        labels = fetch_labels(conn, options.label, options.prefix)
+        delete_labels(conn, labels - used_labels, options.max_delete)
+    except:
+        logging.error(traceback.format_exc())
+        return 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())