Mike Frysinger | d03e6b5 | 2019-08-03 12:49:01 -0400 | [diff] [blame] | 1 | #!/usr/bin/python2 |
Shuqian Zhao | bbf1daa | 2016-07-26 14:54:04 -0700 | [diff] [blame] | 2 | # Copyright 2016 The Chromium OS Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | """Kill slow queries in local autotest database.""" |
| 7 | |
| 8 | import logging |
Shuqian Zhao | bbf1daa | 2016-07-26 14:54:04 -0700 | [diff] [blame] | 9 | import optparse |
| 10 | import sys |
Shuqian Zhao | 0bb62f4 | 2017-06-21 16:28:23 -0700 | [diff] [blame] | 11 | import time |
Shuqian Zhao | bbf1daa | 2016-07-26 14:54:04 -0700 | [diff] [blame] | 12 | |
| 13 | import common |
| 14 | from autotest_lib.client.common_lib import global_config |
| 15 | from autotest_lib.site_utils import gmail_lib |
Shuqian Zhao | 0bb62f4 | 2017-06-21 16:28:23 -0700 | [diff] [blame] | 16 | from autotest_lib.client.common_lib import utils |
Paul Hobbs | b87be55 | 2017-07-31 19:10:49 -0700 | [diff] [blame] | 17 | from autotest_lib.site_utils.stats import mysql_stats |
Shuqian Zhao | 0bb62f4 | 2017-06-21 16:28:23 -0700 | [diff] [blame] | 18 | |
| 19 | try: |
| 20 | from chromite.lib import metrics |
| 21 | from chromite.lib import ts_mon_config |
| 22 | except ImportError: |
| 23 | metrics = utils.metrics_mock |
| 24 | ts_mon_config = utils.metrics_mock |
Shuqian Zhao | bbf1daa | 2016-07-26 14:54:04 -0700 | [diff] [blame] | 25 | |
| 26 | AT_DIR='/usr/local/autotest' |
| 27 | DEFAULT_USER = global_config.global_config.get_config_value( |
| 28 | 'CROS', 'db_backup_user', type=str, default='') |
| 29 | DEFAULT_PASSWD = global_config.global_config.get_config_value( |
| 30 | 'CROS', 'db_backup_password', type=str, default='') |
| 31 | DEFAULT_MAIL = global_config.global_config.get_config_value( |
| 32 | 'SCHEDULER', 'notify_email', type=str, default='') |
| 33 | |
| 34 | |
| 35 | def parse_options(): |
| 36 | """Parse the command line arguments.""" |
| 37 | usage = 'usage: %prog [options]' |
| 38 | parser = optparse.OptionParser(usage=usage) |
| 39 | parser.add_option('-u', '--user', default=DEFAULT_USER, |
| 40 | help='User to login to the Autotest DB. Default is the ' |
| 41 | 'one defined in config file.') |
| 42 | parser.add_option('-p', '--password', default=DEFAULT_PASSWD, |
| 43 | help='Password to login to the Autotest DB. Default is ' |
| 44 | 'the one defined in config file.') |
Shuqian Zhao | c6352c0 | 2016-08-31 13:39:39 -0700 | [diff] [blame] | 45 | parser.add_option('-t', '--timeout', type=int, default=300, |
Shuqian Zhao | bbf1daa | 2016-07-26 14:54:04 -0700 | [diff] [blame] | 46 | help='Timeout boundry of the slow database query. ' |
Shuqian Zhao | c6352c0 | 2016-08-31 13:39:39 -0700 | [diff] [blame] | 47 | 'Default is 300s') |
Shuqian Zhao | bbf1daa | 2016-07-26 14:54:04 -0700 | [diff] [blame] | 48 | parser.add_option('-m', '--mail', default=DEFAULT_MAIL, |
| 49 | help='Mail address to send the summary to. Default is ' |
| 50 | 'ChromeOS infra Deputy') |
| 51 | options, args = parser.parse_args() |
| 52 | return parser, options, args |
| 53 | |
| 54 | |
| 55 | def verify_options_and_args(options, args): |
| 56 | """Verify the validity of options and args. |
| 57 | |
| 58 | @param options: The parsed options to verify. |
| 59 | @param args: The parsed args to verify. |
| 60 | |
| 61 | @returns: True if verification passes, False otherwise. |
| 62 | """ |
| 63 | if args: |
| 64 | logging.error('Unknown arguments: ' + str(args)) |
| 65 | return False |
| 66 | |
| 67 | if not (options.user and options.password): |
| 68 | logging.error('Failed to get the default user of password for Autotest' |
| 69 | ' DB. Please specify them through the command line.') |
| 70 | return False |
| 71 | return True |
| 72 | |
| 73 | |
| 74 | def format_the_output(slow_queries): |
| 75 | """Convert a list of slow queries into a readable string format. |
| 76 | |
| 77 | e.g. [(a, b, c...)] --> |
| 78 | "Id: a |
| 79 | Host: b |
| 80 | User: c |
| 81 | ... |
| 82 | " |
| 83 | @param slow_queries: A list of tuples, one tuple contains all the info about |
| 84 | one single slow query. |
| 85 | |
| 86 | @returns: one clean string representation of all the slow queries. |
| 87 | """ |
| 88 | query_str_list = [('Id: %s\nUser: %s\nHost: %s\ndb: %s\nCommand: %s\n' |
| 89 | 'Time: %s\nState: %s\nInfo: %s\n') % |
| 90 | q for q in slow_queries] |
| 91 | return '\n'.join(query_str_list) |
| 92 | |
| 93 | |
| 94 | def kill_slow_queries(user, password, timeout): |
| 95 | """Kill the slow database queries running beyond the timeout limit. |
| 96 | |
| 97 | @param user: User to login to the Autotest DB. |
| 98 | @param password: Password to login to the Autotest DB. |
| 99 | @param timeout: Timeout limit to kill the slow queries. |
| 100 | |
Shuqian Zhao | 0bb62f4 | 2017-06-21 16:28:23 -0700 | [diff] [blame] | 101 | @returns: a tuple, first element is the string representation of all the |
| 102 | killed slow queries, second element is the total number of them. |
Shuqian Zhao | bbf1daa | 2016-07-26 14:54:04 -0700 | [diff] [blame] | 103 | """ |
Paul Hobbs | b87be55 | 2017-07-31 19:10:49 -0700 | [diff] [blame] | 104 | cursor = mysql_stats.RetryingConnection('localhost', user, password) |
| 105 | cursor.Connect() |
| 106 | |
Shuqian Zhao | bbf1daa | 2016-07-26 14:54:04 -0700 | [diff] [blame] | 107 | # Get the processlist. |
Paul Hobbs | b87be55 | 2017-07-31 19:10:49 -0700 | [diff] [blame] | 108 | cursor.Execute('SHOW FULL PROCESSLIST') |
Paul Hobbs | 09ea0e5 | 2017-08-04 14:42:46 -0700 | [diff] [blame] | 109 | processlist = cursor.Fetchall() |
Shuqian Zhao | bbf1daa | 2016-07-26 14:54:04 -0700 | [diff] [blame] | 110 | # Filter out the slow queries and kill them. |
| 111 | slow_queries = [p for p in processlist if p[4]=='Query' and p[5]>=timeout] |
| 112 | queries_str = '' |
Shuqian Zhao | 0bb62f4 | 2017-06-21 16:28:23 -0700 | [diff] [blame] | 113 | num_killed_queries = 0 |
Shuqian Zhao | bbf1daa | 2016-07-26 14:54:04 -0700 | [diff] [blame] | 114 | if slow_queries: |
| 115 | queries_str = format_the_output(slow_queries) |
| 116 | queries_ids = [q[0] for q in slow_queries] |
| 117 | logging.info('Start killing following slow queries\n%s', queries_str) |
| 118 | for query_id in queries_ids: |
| 119 | logging.info('Killing %s...', query_id) |
Paul Hobbs | b87be55 | 2017-07-31 19:10:49 -0700 | [diff] [blame] | 120 | cursor.Execute('KILL %d' % query_id) |
Shuqian Zhao | bbf1daa | 2016-07-26 14:54:04 -0700 | [diff] [blame] | 121 | logging.info('Done!') |
Shuqian Zhao | 0bb62f4 | 2017-06-21 16:28:23 -0700 | [diff] [blame] | 122 | num_killed_queries += 1 |
Shuqian Zhao | bbf1daa | 2016-07-26 14:54:04 -0700 | [diff] [blame] | 123 | else: |
| 124 | logging.info('No slow queries over %ds!', timeout) |
Shuqian Zhao | 0bb62f4 | 2017-06-21 16:28:23 -0700 | [diff] [blame] | 125 | return (queries_str, num_killed_queries) |
Shuqian Zhao | bbf1daa | 2016-07-26 14:54:04 -0700 | [diff] [blame] | 126 | |
| 127 | |
| 128 | def main(): |
| 129 | """Main entry.""" |
Xixuan Wu | 20bbb1d | 2017-11-27 11:52:50 -0800 | [diff] [blame] | 130 | # Clear all loggers to make sure the following basicConfig take effect. |
| 131 | logging.shutdown() |
| 132 | reload(logging) |
Shuqian Zhao | bbf1daa | 2016-07-26 14:54:04 -0700 | [diff] [blame] | 133 | logging.basicConfig(format='%(asctime)s %(message)s', |
| 134 | datefmt='%m/%d/%Y %H:%M:%S', level=logging.DEBUG) |
| 135 | |
Shuqian Zhao | 0bb62f4 | 2017-06-21 16:28:23 -0700 | [diff] [blame] | 136 | with ts_mon_config.SetupTsMonGlobalState(service_name='kill_slow_queries', |
| 137 | indirect=True): |
| 138 | count = 0 |
| 139 | parser, options, args = parse_options() |
| 140 | if not verify_options_and_args(options, args): |
| 141 | parser.print_help() |
| 142 | return 1 |
| 143 | try: |
| 144 | while True: |
| 145 | result_log_strs, count = kill_slow_queries( |
| 146 | options.user, options.password, options.timeout) |
| 147 | if result_log_strs: |
| 148 | gmail_lib.send_email( |
| 149 | options.mail, |
| 150 | 'Successfully killed slow autotest db queries', |
| 151 | 'Below are killed queries:\n%s' % result_log_strs) |
| 152 | m = 'chromeos/autotest/afe_db/killed_slow_queries' |
| 153 | metrics.Counter(m).increment_by(count) |
| 154 | time.sleep(options.timeout) |
| 155 | except Exception as e: |
Aviv Keshet | 0a381a0 | 2018-02-15 20:48:08 -0800 | [diff] [blame] | 156 | m = 'chromeos/autotest/afe_db/failed_to_kill_query' |
| 157 | metrics.Counter(m).increment() |
Shuqian Zhao | 0bb62f4 | 2017-06-21 16:28:23 -0700 | [diff] [blame] | 158 | logging.error('Failed to kill slow db queries.\n%s', e) |
Shuqian Zhao | 0bb62f4 | 2017-06-21 16:28:23 -0700 | [diff] [blame] | 159 | raise |
Shuqian Zhao | bbf1daa | 2016-07-26 14:54:04 -0700 | [diff] [blame] | 160 | |
| 161 | |
| 162 | if __name__ == '__main__': |
| 163 | sys.exit(main()) |
| 164 | |