blob: face8aa28c5c76d9161bbff35487d9dadb8b7de6 [file] [log] [blame]
Dan Shi95329e92017-02-02 11:12:15 -08001#!/usr/bin/python
2# Copyright 2017 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Utility to check the replication delay of the slave databases.
7
8The utility checks the value of Seconds_Behind_Master of slave databases,
9including:
10Slave databases of AFE database, retrieved from server database.
11Readonly replicas of TKO database, passed in by option --replicas.
12"""
13
14import argparse
15import logging
16import os
17import re
18
19import common
20from autotest_lib.client.bin import utils
21from autotest_lib.client.common_lib import error
22from autotest_lib.client.common_lib import global_config
23from autotest_lib.client.common_lib import logging_config
24from autotest_lib.frontend import setup_django_environment
25from autotest_lib.site_utils import server_manager_utils
26
27from chromite.lib import metrics
28
29
30CONFIG = global_config.global_config
31
32# SQL command to remove old test results in TKO database.
33SLAVE_STATUS_CMD = 'show slave status\G'
34DELAY_TIME_REGEX = 'Seconds_Behind_Master:\s(\d+)'
Jacob Kopczynski35fa24c2018-04-17 19:53:42 -070035DELAY_METRICS = 'chromeos/autotest/afe_db/seconds_behind_master'
Dan Shi95329e92017-02-02 11:12:15 -080036# A large delay to report to metrics indicating the replica is in error.
37LARGE_DELAY = 1000000
38
39def check_delay(server, user, password):
40 """Check the delay of a given slave database server.
41
42 @param server: Hostname or IP address of the MySQL server.
43 @param user: User name to log in the MySQL server.
44 @param password: Password to log in the MySQL server.
45 """
46 try:
47 result = utils.run_sql_cmd(server, user, password, SLAVE_STATUS_CMD)
48 search = re.search(DELAY_TIME_REGEX, result, re.MULTILINE)
49 if search:
50 delay = int(search.group(1))
51 metrics.SecondsDistribution(DELAY_METRICS).add(
52 delay, fields={'server': server})
53 logging.debug('Seconds_Behind_Master of server %s is %d.', server,
54 delay)
55 else:
56 # The value of Seconds_Behind_Master could be NULL, report a large
57 # number to indicate database error.
58 metrics.SecondsDistribution(DELAY_METRICS).add(
59 LARGE_DELAY, fields={'server': server})
60 logging.error('Failed to get Seconds_Behind_Master of server %s '
61 'from slave status:\n %s', server, result)
62 except error.CmdError:
63 logging.exception('Failed to get slave status of server %s.', server)
64
65
66def parse_options():
67 """Parse command line inputs.
68
69 @return: Options to run the script.
70 """
71 parser = argparse.ArgumentParser()
72 parser.add_argument('-r', '--replicas', nargs='+',
Jacob Kopczynski4ac6f522018-04-20 14:49:08 -070073 default=[],
Dan Shi95329e92017-02-02 11:12:15 -080074 help='IP addresses of readonly replicas of TKO.')
75 parser.add_argument('-l', '--logfile', type=str,
76 default=None,
77 help='Path to the log file to save logs.')
78 return parser.parse_args()
79
80
81def main():
82 """Main script."""
83 options = parse_options()
84 log_config = logging_config.LoggingConfig()
85 if options.logfile:
86 log_config.add_file_handler(
87 file_path=os.path.abspath(options.logfile), level=logging.DEBUG)
88
89 db_user = CONFIG.get_config_value('AUTOTEST_WEB', 'user')
90 db_password = CONFIG.get_config_value('AUTOTEST_WEB', 'password')
91
92 global_db_user = CONFIG.get_config_value(
93 'AUTOTEST_WEB', 'global_db_user', default=db_user)
94 global_db_password = CONFIG.get_config_value(
95 'AUTOTEST_WEB', 'global_db_password', default=db_password)
96
97 logging.info('Start checking Seconds_Behind_Master of slave databases')
98
Jacob Kopczynski4ac6f522018-04-20 14:49:08 -070099 if not options.replicas:
100 logging.warning('No replicas checked.')
101 else:
102 for replica in options.replicas:
103 check_delay(replica, global_db_user, global_db_password)
Dan Shi95329e92017-02-02 11:12:15 -0800104
105 slaves = server_manager_utils.get_servers(
106 role='database_slave', status='primary')
107 for slave in slaves:
108 check_delay(slave.hostname, db_user, db_password)
109
110 logging.info('Finished checking.')
111
112
113if __name__ == '__main__':
114 main()