Improve metrics from label_cleaner.

label_cleaner's metrics were slightly wrong -- we were never reporting
all the existing labels, and misreporting the prefix-matched labels
under "all".

While there,
- add a dry-run option to test stuff
- add options to pass in database user/password etc from commandline.

BUG=chromium:753134
TEST=Run in dry-run mode.

Change-Id: Ieeca75af725b27e46277589a7a62afe35d63765b
Reviewed-on: https://chromium-review.googlesource.com/862196
Commit-Ready: ChromeOS CL Exonerator Bot <chromiumos-cl-exonerator@appspot.gserviceaccount.com>
Tested-by: Prathmesh Prabhu <pprabhu@chromium.org>
Reviewed-by: Prathmesh Prabhu <pprabhu@chromium.org>
diff --git a/site_utils/label_cleaner.py b/site_utils/label_cleaner.py
index 4ea06e3..2113fc1 100755
--- a/site_utils/label_cleaner.py
+++ b/site_utils/label_cleaner.py
@@ -23,8 +23,10 @@
 
 import argparse
 import logging
+import os
 import socket
 import sys
+import tempfile
 
 import common
 # Installed via build_externals, must be after import common.
@@ -49,6 +51,16 @@
 RESPECT_STATIC_LABELS = global_config.global_config.get_config_value(
         'SKYLAB', 'respect_static_labels', type=bool, default=False)
 
+# Per-prefix metrics are generated only for the following prefixes. This
+# whitelist is a second level defence against populating the 'label_prefix'
+# field with arbitrary values provided on the commandline.
+_LABEL_PREFIX_METRICS_WHITELIST = (
+        'cros-version',
+        'fwro-version',
+        'fwrw-version',
+        'pool',
+)
+
 SELECT_USED_LABELS_FORMAT = """
 SELECT DISTINCT(label_id) FROM afe_autotests_dependency_labels UNION
 SELECT DISTINCT(label_id) FROM afe_hosts_labels UNION
@@ -58,9 +70,6 @@
 SELECT DISTINCT(meta_host) FROM afe_host_queue_entries
 """
 
-SELECT_LABELS_FORMAT = """
-SELECT id FROM afe_labels WHERE name %s
-"""
 SELECT_REPLACED_LABELS = """
 SELECT label_id FROM afe_replaced_labels
 """
@@ -85,21 +94,24 @@
     return set(r[0] for r in rows)
 
 
-def fetch_labels(conn, label, prefix):
+def fetch_labels(conn, label=None, prefix=False):
     """Fetch labels from database.
 
     @param conn: MySQLdb Connection object.
-    @param label: Label name to fetch.
+    @param label: (optional) Label name to fetch.
     @param prefix: If True, use `label` as a prefix. Otherwise, fetch
                    labels whose name is exactly same as `label`.
 
     @return: A list of label ids.
     """
     cursor = conn.cursor()
-    if prefix:
-        sql = SELECT_LABELS_FORMAT % ('LIKE "%s%%"' % label)
+    if label is not None:
+        if prefix:
+            sql = 'SELECT id FROM afe_labels WHERE name LIKE "%s%%"' % label
+        else:
+            sql = 'SELECT id FROM afe_labels WHERE name = "%s"' % label
     else:
-        sql = SELECT_LABELS_FORMAT % ('= "%s"' % label)
+        sql = 'SELECT id FROM afe_labels'
     logging.debug('Running: %r', sql)
     cursor.execute(sql)
     rows = cursor.fetchall()
@@ -114,26 +126,30 @@
         return set(r[0] for r in rows) - replaced_label_ids
 
 
-def _delete_labels(conn, labels):
+def _delete_labels(conn, labels, dry_run):
     """Helper function of `delete_labels`."""
     labels_str = ','.join([str(l) for l in labels])
     sql = DELETE_LABELS_FORMAT % labels_str
-    logging.debug('Running: %r', sql)
-    conn.cursor().execute(sql)
-    conn.commit()
+    if dry_run:
+        logging.info('[DRY RUN] Would have run: %r', sql)
+    else:
+        logging.debug('Running: %r', sql)
+        conn.cursor().execute(sql)
+        conn.commit()
 
 
-def delete_labels(conn, labels, max_delete):
+def delete_labels(conn, labels, max_delete, dry_run=False):
     """Delete given labels from database.
 
     @param conn: MySQLdb Connection object.
     @param labels: iterable of labels to delete.
     @param max_delete: Max number of records to delete in a query.
+    @param dry_run: (Boolean) Whether this is a dry run.
     """
     while labels:
         chunk = labels[:max_delete]
         labels = labels[max_delete:]
-        _delete_labels(conn, chunk)
+        _delete_labels(conn, chunk, dry_run)
 
 
 def is_primary_server():
@@ -160,13 +176,34 @@
     if options.check_status and not is_primary_server():
         raise Exception('Cannot run in a non-primary server')
 
-    conn = MySQLdb.connect(host=options.db_server, user=USER, passwd=PASSWD,
-                           db=DATABASE)
+    conn = MySQLdb.connect(
+            host=options.db_server,
+            user=options.db_user,
+            passwd=options.db_password,
+            db=DATABASE,
+    )
+
+    all_labels = fetch_labels(conn)
+    logging.info('Found total %d labels', len(all_labels))
+    metrics.Gauge(_METRICS_PREFIX + '/total_labels_count').set(
+            len(all_labels),
+            fields={
+                    'target_db': options.db_server,
+                    'label_prefix': '',
+            },
+    )
 
     labels = fetch_labels(conn, options.label, options.prefix)
-    logging.info('Found total %d labels', len(labels))
-    metrics.Gauge(_METRICS_PREFIX + '/total_labels_count').set(
-            len(labels), fields={'target_db': options.db_server})
+    logging.info('Found total %d labels matching %s', len(labels),
+                 options.label)
+    if options.prefix and options.label in _LABEL_PREFIX_METRICS_WHITELIST:
+        metrics.Gauge(_METRICS_PREFIX + '/total_labels_count').set(
+                len(labels),
+                fields={
+                        'target_db': options.db_server,
+                        'label_prefix': options.label,
+                },
+        )
 
     used_labels = get_used_labels(conn)
     logging.info('Found %d labels are used', len(used_labels))
@@ -175,7 +212,7 @@
 
     to_delete = list(labels - used_labels)
     logging.info('Deleting %d unused labels', len(to_delete))
-    delete_labels(conn, to_delete, options.max_delete)
+    delete_labels(conn, to_delete, options.max_delete, options.dry_run)
     metrics.Counter(_METRICS_PREFIX + '/labels_deleted').increment_by(
             len(to_delete), fields={'target_db': options.db_server})
 
@@ -184,20 +221,54 @@
     """Cleans unused labels from AFE database"""
     parser = argparse.ArgumentParser(
             formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--db', dest='db_server',
-                        help='Database server', default=DB_SERVER)
-    parser.add_argument('-p', dest='prefix', action='store_true',
+    parser.add_argument(
+            '--db',
+            dest='db_server',
+            help='Database server',
+            default=DB_SERVER,
+    )
+    parser.add_argument(
+            '--db-user',
+            dest='db_user',
+            help='Database user',
+            default=USER,
+    )
+    parser.add_argument(
+            '--db-password',
+            dest='db_password',
+            help='Database password',
+            default=PASSWD,
+    )
+    parser.add_argument(
+            '-p',
+            dest='prefix',
+            action='store_true',
             help=('Use argument <label> as a prefix for matching. '
                   'For example, when the argument <label> is "cros-version" '
                   'and this option is enabled, then labels whose name '
                   'beginning with "cros-version" are matched. When this '
                   'option is disabled, we match labels whose name is '
-                  'exactly same as the argument <label>.'))
-    parser.add_argument('-n', dest='max_delete', type=int,
-           help=('Max number of records to delete in each query.'),
-           default=100)
-    parser.add_argument('-s', dest='check_status', action='store_true',
-           help=('Enforce to run only in a server that has primary status'))
+                  'exactly same as the argument <label>.'),
+    )
+    parser.add_argument(
+            '-n',
+            dest='max_delete',
+            type=int,
+            help='Max number of records to delete in each query.',
+            default=100,
+    )
+    parser.add_argument(
+            '-s',
+            dest='check_status',
+            action='store_true',
+            help='Enforce to run only in a server that has primary status',
+    )
+    parser.add_argument(
+            '--dry-run',
+            dest='dry_run',
+            action='store_true',
+            help='Dry run mode. Do not actually delete any labels.',
+    )
     parser.add_argument('label', help='Label name to delete')
     options = parser.parse_args()
 
@@ -205,8 +276,20 @@
             datefmt='%Y-%m-%d %H:%M:%S',
             verbose=True)
 
-    with ts_mon_config.SetupTsMonGlobalState('afe_label_cleaner',
-                                             auto_flush=False):
+    if options.dry_run:
+        tfd, metrics_file=tempfile.mkstemp()
+        os.close(tfd)
+        ts_mon_context = ts_mon_config.SetupTsMonGlobalState(
+                'afe_label_cleaner',
+                auto_flush=False,
+                debug_file=metrics_file,
+        )
+    else:
+        ts_mon_context = ts_mon_config.SetupTsMonGlobalState(
+                'afe_label_cleaner',
+                auto_flush=False,
+        )
+    with ts_mon_context:
         try:
             clean_labels(options)
         except:
@@ -220,6 +303,8 @@
                             'success': True})
         finally:
             metrics.Flush()
+            if options.dry_run:
+                logging.info('Dumped ts_mon metrics to %s', metrics_file)
 
 
 if __name__ == '__main__':