Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # |
| 3 | # Copyright 2010 Google Inc. All Rights Reserved. |
| 4 | |
| 5 | """Tool to check the data consistency between master autotest db and replica. |
| 6 | |
| 7 | This tool will issue 'show master status;' and 'show slave status;' commands to |
| 8 | two replicated databases to compare its log position. |
| 9 | |
| 10 | It will also take a delta command line argument to allow certain time delay |
| 11 | between master and slave. If the delta of two log positions falls into the |
| 12 | defined range, it will be treated as synced. |
| 13 | |
| 14 | It will send out an email notification upon any problem if specified an --to |
| 15 | argument. |
| 16 | """ |
| 17 | |
| 18 | import getpass |
| 19 | import MySQLdb |
| 20 | import optparse |
| 21 | import os |
| 22 | import socket |
| 23 | import sys |
| 24 | |
| 25 | import common |
| 26 | from autotest_lib.client.common_lib import global_config |
| 27 | |
| 28 | |
| 29 | c = global_config.global_config |
| 30 | _section = 'AUTOTEST_WEB' |
| 31 | DATABASE_HOST = c.get_config_value(_section, "host") |
| 32 | REPLICA_DATABASE_HOST = c.get_config_value(_section, "readonly_host") |
| 33 | DATABASE_NAME = c.get_config_value(_section, "database") |
| 34 | DATABASE_USER = c.get_config_value(_section, "user") |
| 35 | DATABASE_PASSWORD = c.get_config_value(_section, "password") |
| 36 | SYSTEM_USER = 'chromeos-test' |
| 37 | |
| 38 | |
| 39 | def ParseOptions(): |
| 40 | parser = optparse.OptionParser() |
| 41 | parser.add_option('-d', '--delta', help='Difference between master and ' |
| 42 | 'replica db', type='int', dest='delta', default=0) |
| 43 | parser.add_option('--to', help='Comma separated Email notification TO ' |
| 44 | 'recipients.', dest='to', type='string', default='') |
| 45 | parser.add_option('--cc', help='Comma separated Email notification CC ' |
| 46 | 'recipients.', dest='cc', type='string', default='') |
| 47 | parser.add_option('-t', '--test-mode', help='skip common group email', |
| 48 | dest='testmode', action='store_true', default=False) |
| 49 | options, _ = parser.parse_args() |
| 50 | return options |
| 51 | |
| 52 | |
| 53 | def FetchMasterResult(): |
| 54 | master_conn = MySQLdb.connect(host=DATABASE_HOST, |
| 55 | user=DATABASE_USER, |
| 56 | passwd=DATABASE_PASSWORD, |
| 57 | db=DATABASE_NAME ) |
| 58 | cursor = master_conn.cursor(MySQLdb.cursors.DictCursor) |
| 59 | cursor.execute ("show master status;") |
| 60 | master_result = cursor.fetchone() |
| 61 | master_conn.close() |
| 62 | return master_result |
| 63 | |
| 64 | |
| 65 | def FetchSlaveResult(): |
| 66 | replica_conn = MySQLdb.connect(host=REPLICA_DATABASE_HOST, |
| 67 | user=DATABASE_USER, |
| 68 | passwd=DATABASE_PASSWORD, |
| 69 | db=DATABASE_NAME ) |
| 70 | cursor = replica_conn.cursor(MySQLdb.cursors.DictCursor) |
| 71 | cursor.execute ("show slave status;") |
| 72 | slave_result = cursor.fetchone() |
| 73 | replica_conn.close() |
| 74 | return slave_result |
| 75 | |
| 76 | |
| 77 | def RunChecks(options, master_result, slave_result): |
| 78 | master_pos = master_result['Position'] |
| 79 | slave_pos = slave_result['Read_Master_Log_Pos'] |
| 80 | if (master_pos - slave_pos) > options.delta: |
| 81 | return 'DELTA EXCEEDED: master=%s, slave=%s' % (master_pos, slave_pos) |
| 82 | if slave_result['Last_SQL_Error'] != '': |
| 83 | return 'SLAVE Last_SQL_Error' |
| 84 | if slave_result['Slave_IO_State'] != 'Waiting for master to send event': |
| 85 | return 'SLAVE Slave_IO_State' |
| 86 | if slave_result['Last_IO_Error'] != '': |
| 87 | return 'SLAVE Last_IO_Error' |
| 88 | if slave_result['Slave_SQL_Running'] != 'Yes': |
| 89 | return 'SLAVE Slave_SQL_Running' |
| 90 | if slave_result['Slave_IO_Running'] != 'Yes': |
| 91 | return 'SLAVE Slave_IO_Running' |
| 92 | return None |
| 93 | |
| 94 | |
| 95 | def ShowStatus(options, master_result, slave_result, msg): |
| 96 | summary = 'Master (%s) and slave (%s) databases are out of sync.' % ( |
| 97 | DATABASE_HOST, REPLICA_DATABASE_HOST) + msg |
| 98 | if not options.to: |
| 99 | print summary |
| 100 | print 'Master status:' |
| 101 | print str(master_result) |
| 102 | print 'Slave status:' |
| 103 | print str(slave_result) |
| 104 | else: |
| 105 | email_to = ['%s@google.com' % to.strip() for to in options.to.split(',')] |
| 106 | email_cc = [] |
| 107 | if options.cc: |
| 108 | email_cc.extend( |
| 109 | '%s@google.com' % cc.strip() for cc in options.cc.split(',')) |
| 110 | if getpass.getuser() == SYSTEM_USER and not options.testmode: |
Aviv Keshet | 9a27f19 | 2017-04-14 11:06:02 -0700 | [diff] [blame] | 111 | email_cc.append('chromeos-build-alerts+db-replica-checker@google.com') |
Scott Zawalski | 20a9b58 | 2011-11-21 11:49:40 -0800 | [diff] [blame] | 112 | body = ('%s\n\n' |
| 113 | 'Master (%s) status:\n%s\n\n' |
| 114 | 'Slave (%s) status:\n%s' % (summary, DATABASE_HOST, master_result, |
| 115 | REPLICA_DATABASE_HOST, slave_result)) |
| 116 | p = os.popen('/usr/sbin/sendmail -t', 'w') |
| 117 | p.write('To: %s\n' % ','.join(email_to)) |
| 118 | if email_cc: |
| 119 | p.write('Cc: %s\n' % ','.join(email_cc)) |
| 120 | |
| 121 | p.write('Subject: Inconsistency detected in cautotest DB replica on %s.\n' |
| 122 | % socket.gethostname()) |
| 123 | p.write('Content-Type: text/plain') |
| 124 | p.write('\n') # blank line separating headers from body |
| 125 | p.write(body) |
| 126 | p.write('\n') |
| 127 | return_code = p.close() |
| 128 | if return_code is not None: |
| 129 | print 'Sendmail exit status %s' % return_code |
| 130 | |
| 131 | |
| 132 | def main(): |
| 133 | options = ParseOptions() |
| 134 | master_result = FetchMasterResult() |
| 135 | slave_result = FetchSlaveResult() |
| 136 | problem_msg = RunChecks(options, master_result, slave_result) |
| 137 | if problem_msg: |
| 138 | ShowStatus(options, master_result, slave_result, problem_msg) |
| 139 | sys.exit(-1) |
| 140 | |
| 141 | if __name__ == '__main__': |
| 142 | main() |