Dan Shi | 95329e9 | 2017-02-02 11:12:15 -0800 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # Copyright 2017 The Chromium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | """Utility to check the replication delay of the slave databases. |
| 7 | |
| 8 | The utility checks the value of Seconds_Behind_Master of slave databases, |
| 9 | including: |
| 10 | Slave databases of AFE database, retrieved from server database. |
| 11 | Readonly replicas of TKO database, passed in by option --replicas. |
| 12 | """ |
| 13 | |
| 14 | import argparse |
| 15 | import logging |
| 16 | import os |
| 17 | import re |
| 18 | |
| 19 | import common |
| 20 | from autotest_lib.client.bin import utils |
| 21 | from autotest_lib.client.common_lib import error |
| 22 | from autotest_lib.client.common_lib import global_config |
| 23 | from autotest_lib.client.common_lib import logging_config |
| 24 | from autotest_lib.frontend import setup_django_environment |
| 25 | from autotest_lib.site_utils import server_manager_utils |
| 26 | |
| 27 | from chromite.lib import metrics |
| 28 | |
| 29 | |
| 30 | CONFIG = global_config.global_config |
| 31 | |
| 32 | # SQL command to remove old test results in TKO database. |
| 33 | SLAVE_STATUS_CMD = 'show slave status\G' |
| 34 | DELAY_TIME_REGEX = 'Seconds_Behind_Master:\s(\d+)' |
Jacob Kopczynski | 35fa24c | 2018-04-17 19:53:42 -0700 | [diff] [blame] | 35 | DELAY_METRICS = 'chromeos/autotest/afe_db/seconds_behind_master' |
Dan Shi | 95329e9 | 2017-02-02 11:12:15 -0800 | [diff] [blame] | 36 | # A large delay to report to metrics indicating the replica is in error. |
| 37 | LARGE_DELAY = 1000000 |
| 38 | |
| 39 | def check_delay(server, user, password): |
| 40 | """Check the delay of a given slave database server. |
| 41 | |
| 42 | @param server: Hostname or IP address of the MySQL server. |
| 43 | @param user: User name to log in the MySQL server. |
| 44 | @param password: Password to log in the MySQL server. |
| 45 | """ |
| 46 | try: |
| 47 | result = utils.run_sql_cmd(server, user, password, SLAVE_STATUS_CMD) |
| 48 | search = re.search(DELAY_TIME_REGEX, result, re.MULTILINE) |
| 49 | if search: |
| 50 | delay = int(search.group(1)) |
| 51 | metrics.SecondsDistribution(DELAY_METRICS).add( |
| 52 | delay, fields={'server': server}) |
| 53 | logging.debug('Seconds_Behind_Master of server %s is %d.', server, |
| 54 | delay) |
| 55 | else: |
| 56 | # The value of Seconds_Behind_Master could be NULL, report a large |
| 57 | # number to indicate database error. |
| 58 | metrics.SecondsDistribution(DELAY_METRICS).add( |
| 59 | LARGE_DELAY, fields={'server': server}) |
| 60 | logging.error('Failed to get Seconds_Behind_Master of server %s ' |
| 61 | 'from slave status:\n %s', server, result) |
| 62 | except error.CmdError: |
| 63 | logging.exception('Failed to get slave status of server %s.', server) |
| 64 | |
| 65 | |
| 66 | def parse_options(): |
| 67 | """Parse command line inputs. |
| 68 | |
| 69 | @return: Options to run the script. |
| 70 | """ |
| 71 | parser = argparse.ArgumentParser() |
| 72 | parser.add_argument('-r', '--replicas', nargs='+', |
Jacob Kopczynski | 4ac6f52 | 2018-04-20 14:49:08 -0700 | [diff] [blame] | 73 | default=[], |
Dan Shi | 95329e9 | 2017-02-02 11:12:15 -0800 | [diff] [blame] | 74 | help='IP addresses of readonly replicas of TKO.') |
| 75 | parser.add_argument('-l', '--logfile', type=str, |
| 76 | default=None, |
| 77 | help='Path to the log file to save logs.') |
| 78 | return parser.parse_args() |
| 79 | |
| 80 | |
| 81 | def main(): |
| 82 | """Main script.""" |
| 83 | options = parse_options() |
| 84 | log_config = logging_config.LoggingConfig() |
| 85 | if options.logfile: |
| 86 | log_config.add_file_handler( |
| 87 | file_path=os.path.abspath(options.logfile), level=logging.DEBUG) |
| 88 | |
| 89 | db_user = CONFIG.get_config_value('AUTOTEST_WEB', 'user') |
| 90 | db_password = CONFIG.get_config_value('AUTOTEST_WEB', 'password') |
| 91 | |
| 92 | global_db_user = CONFIG.get_config_value( |
| 93 | 'AUTOTEST_WEB', 'global_db_user', default=db_user) |
| 94 | global_db_password = CONFIG.get_config_value( |
| 95 | 'AUTOTEST_WEB', 'global_db_password', default=db_password) |
| 96 | |
| 97 | logging.info('Start checking Seconds_Behind_Master of slave databases') |
| 98 | |
Jacob Kopczynski | 4ac6f52 | 2018-04-20 14:49:08 -0700 | [diff] [blame] | 99 | if not options.replicas: |
| 100 | logging.warning('No replicas checked.') |
| 101 | else: |
| 102 | for replica in options.replicas: |
| 103 | check_delay(replica, global_db_user, global_db_password) |
Dan Shi | 95329e9 | 2017-02-02 11:12:15 -0800 | [diff] [blame] | 104 | |
| 105 | slaves = server_manager_utils.get_servers( |
| 106 | role='database_slave', status='primary') |
| 107 | for slave in slaves: |
| 108 | check_delay(slave.hostname, db_user, db_password) |
| 109 | |
| 110 | logging.info('Finished checking.') |
| 111 | |
| 112 | |
| 113 | if __name__ == '__main__': |
| 114 | main() |