Shuqian Zhao | 3198586 | 2016-09-01 16:20:01 -0700 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | |
| 3 | # Copyright 2016 The Chromium OS Authors. All rights reserved. |
| 4 | # Use of this source code is governed by a BSD-style license that can be |
| 5 | # found in the LICENSE file. |
| 6 | |
| 7 | """Queries a MySQL database and emits status metrics to Monarch. |
| 8 | |
| 9 | Note: confusingly, 'Innodb_buffer_pool_reads' is actually the cache-misses, not |
| 10 | the number of reads to the buffer pool. 'Innodb_buffer_pool_read_requests' |
| 11 | corresponds to the number of reads the the buffer pool. |
| 12 | """ |
Paul Hobbs | ed83f29 | 2017-02-24 22:57:19 -0800 | [diff] [blame] | 13 | import logging |
| 14 | import sys |
Shuqian Zhao | 3198586 | 2016-09-01 16:20:01 -0700 | [diff] [blame] | 15 | |
| 16 | import MySQLdb |
| 17 | import time |
| 18 | |
| 19 | import common |
| 20 | |
Shuqian Zhao | 3198586 | 2016-09-01 16:20:01 -0700 | [diff] [blame] | 21 | from autotest_lib.client.common_lib import global_config |
Paul Hobbs | e86ccd8 | 2017-03-24 19:39:24 -0700 | [diff] [blame] | 22 | from autotest_lib.client.common_lib.cros import retry |
Dan Shi | 5e2efb7 | 2017-02-07 11:40:23 -0800 | [diff] [blame] | 23 | |
Prathmesh Prabhu | ae7bb7e | 2018-02-27 11:29:10 -0800 | [diff] [blame] | 24 | from chromite.lib import metrics |
| 25 | from chromite.lib import ts_mon_config |
Shuqian Zhao | 3198586 | 2016-09-01 16:20:01 -0700 | [diff] [blame] | 26 | |
| 27 | AT_DIR='/usr/local/autotest' |
| 28 | DEFAULT_USER = global_config.global_config.get_config_value( |
| 29 | 'CROS', 'db_backup_user', type=str, default='') |
| 30 | DEFAULT_PASSWD = global_config.global_config.get_config_value( |
| 31 | 'CROS', 'db_backup_password', type=str, default='') |
Paul Hobbs | e86ccd8 | 2017-03-24 19:39:24 -0700 | [diff] [blame] | 32 | |
Shuqian Zhao | 3198586 | 2016-09-01 16:20:01 -0700 | [diff] [blame] | 33 | LOOP_INTERVAL = 60 |
Paul Hobbs | e86ccd8 | 2017-03-24 19:39:24 -0700 | [diff] [blame] | 34 | |
Paul Hobbs | d4abbf4 | 2016-09-16 16:52:24 -0700 | [diff] [blame] | 35 | EMITTED_STATUSES_COUNTERS = [ |
Jacob Kopczynski | 35fa24c | 2018-04-17 19:53:42 -0700 | [diff] [blame] | 36 | 'bytes_received', |
| 37 | 'bytes_sent', |
| 38 | 'connections', |
| 39 | 'Innodb_buffer_pool_read_requests', |
| 40 | 'Innodb_buffer_pool_reads', |
| 41 | 'Innodb_row_lock_waits', |
| 42 | 'questions', |
| 43 | 'slow_queries', |
| 44 | 'threads_created', |
Shuqian Zhao | 3198586 | 2016-09-01 16:20:01 -0700 | [diff] [blame] | 45 | ] |
| 46 | |
Paul Hobbs | d66e6a9 | 2017-09-13 15:08:10 -0700 | [diff] [blame] | 47 | EMITTED_STATUS_GAUGES = [ |
Jacob Kopczynski | 35fa24c | 2018-04-17 19:53:42 -0700 | [diff] [blame] | 48 | 'Innodb_row_lock_time_avg', |
| 49 | 'Innodb_row_lock_current_waits', |
| 50 | 'threads_running', |
| 51 | 'threads_connected', |
Paul Hobbs | d66e6a9 | 2017-09-13 15:08:10 -0700 | [diff] [blame] | 52 | ] |
Paul Hobbs | d4abbf4 | 2016-09-16 16:52:24 -0700 | [diff] [blame] | 53 | |
Paul Hobbs | 7bc6b34 | 2016-09-20 12:13:53 -0700 | [diff] [blame] | 54 | |
Paul Hobbs | eb55c95 | 2017-07-31 19:09:38 -0700 | [diff] [blame] | 55 | class RetryingConnection(object): |
Paul Hobbs | e86ccd8 | 2017-03-24 19:39:24 -0700 | [diff] [blame] | 56 | """Maintains a db connection and a cursor.""" |
Paul Hobbs | eb55c95 | 2017-07-31 19:09:38 -0700 | [diff] [blame] | 57 | INITIAL_SLEEP_SECONDS = 20 |
| 58 | MAX_TIMEOUT_SECONDS = 60 * 60 |
Paul Hobbs | b1ab90a | 2016-09-16 14:33:19 -0700 | [diff] [blame] | 59 | |
Paul Hobbs | e86ccd8 | 2017-03-24 19:39:24 -0700 | [diff] [blame] | 60 | def __init__(self, *args, **kwargs): |
| 61 | self.args = args |
| 62 | self.kwargs = kwargs |
| 63 | self.db = None |
| 64 | self.cursor = None |
| 65 | |
| 66 | def Connect(self): |
| 67 | """Establishes a MySQL connection and creates a cursor.""" |
| 68 | self.db = MySQLdb.connect(*self.args, **self.kwargs) |
| 69 | self.cursor = self.db.cursor() |
| 70 | |
| 71 | def Reconnect(self): |
Paul Hobbs | eb55c95 | 2017-07-31 19:09:38 -0700 | [diff] [blame] | 72 | """Attempts to close the connection, then reconnects.""" |
| 73 | try: |
| 74 | self.cursor.close() |
| 75 | self.db.close() |
| 76 | except MySQLdb.Error: |
| 77 | pass |
| 78 | self.Connect() |
| 79 | |
| 80 | def RetryWith(self, func): |
| 81 | """Run a function, retrying on OperationalError.""" |
| 82 | return retry.retry( |
| 83 | MySQLdb.OperationalError, |
| 84 | delay_sec=self.INITIAL_SLEEP_SECONDS, |
| 85 | timeout_min=self.MAX_TIMEOUT_SECONDS, |
| 86 | callback=self.Reconnect |
| 87 | )(func)() |
| 88 | |
| 89 | def Execute(self, *args, **kwargs): |
| 90 | """Runs .execute on the cursor, reconnecting on failure.""" |
| 91 | def _Execute(): |
| 92 | return self.cursor.execute(*args, **kwargs) |
| 93 | return self.RetryWith(_Execute) |
Shuqian Zhao | 3198586 | 2016-09-01 16:20:01 -0700 | [diff] [blame] | 94 | |
Paul Hobbs | 09ea0e5 | 2017-08-04 14:42:46 -0700 | [diff] [blame] | 95 | def Fetchall(self): |
| 96 | """Runs .fetchall on the cursor.""" |
| 97 | return self.cursor.fetchall() |
| 98 | |
Shuqian Zhao | 3198586 | 2016-09-01 16:20:01 -0700 | [diff] [blame] | 99 | |
Paul Hobbs | e86ccd8 | 2017-03-24 19:39:24 -0700 | [diff] [blame] | 100 | def GetStatus(connection, status): |
Paul Hobbs | eb55c95 | 2017-07-31 19:09:38 -0700 | [diff] [blame] | 101 | """Get the status variable from the database, retrying on failure. |
Shuqian Zhao | 3198586 | 2016-09-01 16:20:01 -0700 | [diff] [blame] | 102 | |
Paul Hobbs | eb55c95 | 2017-07-31 19:09:38 -0700 | [diff] [blame] | 103 | @param connection: MySQLdb cursor to query with. |
| 104 | @param status: Name of the status variable. |
| 105 | @returns The mysql query result. |
| 106 | """ |
Paul Hobbs | e587fd6 | 2017-09-13 11:52:30 -0700 | [diff] [blame] | 107 | connection.Execute('SHOW GLOBAL STATUS LIKE "%s";' % status) |
| 108 | output = connection.Fetchall()[0][1] |
Paul Hobbs | d4abbf4 | 2016-09-16 16:52:24 -0700 | [diff] [blame] | 109 | |
Paul Hobbs | eb55c95 | 2017-07-31 19:09:38 -0700 | [diff] [blame] | 110 | if not output: |
| 111 | logging.error('Cannot find any global status like %s', status) |
Paul Hobbs | e86ccd8 | 2017-03-24 19:39:24 -0700 | [diff] [blame] | 112 | |
Paul Hobbs | eb55c95 | 2017-07-31 19:09:38 -0700 | [diff] [blame] | 113 | return int(output) |
Shuqian Zhao | 3198586 | 2016-09-01 16:20:01 -0700 | [diff] [blame] | 114 | |
| 115 | |
Paul Hobbs | e86ccd8 | 2017-03-24 19:39:24 -0700 | [diff] [blame] | 116 | def QueryAndEmit(baselines, conn): |
Shuqian Zhao | 3198586 | 2016-09-01 16:20:01 -0700 | [diff] [blame] | 117 | """Queries MySQL for important stats and emits Monarch metrics |
| 118 | |
Paul Hobbs | d4abbf4 | 2016-09-16 16:52:24 -0700 | [diff] [blame] | 119 | @param baselines: A dict containing the initial values for the cumulative |
| 120 | metrics. |
Paul Hobbs | e86ccd8 | 2017-03-24 19:39:24 -0700 | [diff] [blame] | 121 | @param conn: The mysql connection object. |
Shuqian Zhao | 3198586 | 2016-09-01 16:20:01 -0700 | [diff] [blame] | 122 | """ |
Paul Hobbs | d4abbf4 | 2016-09-16 16:52:24 -0700 | [diff] [blame] | 123 | for status in EMITTED_STATUSES_COUNTERS: |
Paul Hobbs | d4abbf4 | 2016-09-16 16:52:24 -0700 | [diff] [blame] | 124 | metric_name = 'chromeos/autotest/afe_db/%s' % status.lower() |
Paul Hobbs | e86ccd8 | 2017-03-24 19:39:24 -0700 | [diff] [blame] | 125 | delta = GetStatus(conn, status) - baselines[status] |
Paul Hobbs | d4abbf4 | 2016-09-16 16:52:24 -0700 | [diff] [blame] | 126 | metrics.Counter(metric_name).set(delta) |
Shuqian Zhao | 3198586 | 2016-09-01 16:20:01 -0700 | [diff] [blame] | 127 | |
Paul Hobbs | d4abbf4 | 2016-09-16 16:52:24 -0700 | [diff] [blame] | 128 | for status in EMITTED_STATUS_GAUGES: |
| 129 | metric_name = 'chromeos/autotest/afe_db/%s' % status.lower() |
Paul Hobbs | e86ccd8 | 2017-03-24 19:39:24 -0700 | [diff] [blame] | 130 | metrics.Gauge(metric_name).set(GetStatus(conn, status)) |
Shuqian Zhao | 3198586 | 2016-09-01 16:20:01 -0700 | [diff] [blame] | 131 | |
Paul Hobbs | e86ccd8 | 2017-03-24 19:39:24 -0700 | [diff] [blame] | 132 | pages_free = GetStatus(conn, 'Innodb_buffer_pool_pages_free') |
| 133 | pages_total = GetStatus(conn, 'Innodb_buffer_pool_pages_total') |
Shuqian Zhao | 3198586 | 2016-09-01 16:20:01 -0700 | [diff] [blame] | 134 | |
| 135 | metrics.Gauge('chromeos/autotest/afe_db/buffer_pool_pages').set( |
| 136 | pages_free, fields={'used': False}) |
| 137 | |
| 138 | metrics.Gauge('chromeos/autotest/afe_db/buffer_pool_pages').set( |
| 139 | pages_total - pages_free, fields={'used': True}) |
| 140 | |
| 141 | |
Paul Hobbs | e86ccd8 | 2017-03-24 19:39:24 -0700 | [diff] [blame] | 142 | def main(): |
| 143 | """Sets up ts_mon and repeatedly queries MySQL stats""" |
| 144 | logging.basicConfig(stream=sys.stdout, level=logging.INFO) |
Paul Hobbs | eb55c95 | 2017-07-31 19:09:38 -0700 | [diff] [blame] | 145 | conn = RetryingConnection('localhost', DEFAULT_USER, DEFAULT_PASSWD) |
Paul Hobbs | e86ccd8 | 2017-03-24 19:39:24 -0700 | [diff] [blame] | 146 | conn.Connect() |
| 147 | |
Paul Hobbs | 10b1072 | 2018-01-26 10:01:02 -0800 | [diff] [blame] | 148 | # TODO(crbug.com/803566) Use indirect=False to mitigate orphan mysql_stats |
| 149 | # processes overwhelming shards. |
| 150 | with ts_mon_config.SetupTsMonGlobalState('mysql_stats', indirect=False): |
Paul Hobbs | e86ccd8 | 2017-03-24 19:39:24 -0700 | [diff] [blame] | 151 | QueryLoop(conn) |
| 152 | |
| 153 | |
| 154 | def QueryLoop(conn): |
| 155 | """Queries and emits metrics every LOOP_INTERVAL seconds. |
| 156 | |
| 157 | @param conn: The mysql connection object. |
| 158 | """ |
| 159 | # Get the baselines for cumulative metrics. Otherwise the windowed rate at |
| 160 | # the very beginning will be extremely high as it shoots up from 0 to its |
| 161 | # current value. |
| 162 | baselines = dict((s, GetStatus(conn, s)) |
| 163 | for s in EMITTED_STATUSES_COUNTERS) |
| 164 | |
| 165 | while True: |
| 166 | now = time.time() |
| 167 | QueryAndEmit(baselines, conn) |
| 168 | time_spent = time.time() - now |
| 169 | sleep_duration = LOOP_INTERVAL - time_spent |
Stéphane Marchesin | 33fdd87 | 2017-05-09 17:08:27 -0700 | [diff] [blame] | 170 | time.sleep(max(0, sleep_duration)) |
Paul Hobbs | e86ccd8 | 2017-03-24 19:39:24 -0700 | [diff] [blame] | 171 | |
| 172 | |
Shuqian Zhao | 3198586 | 2016-09-01 16:20:01 -0700 | [diff] [blame] | 173 | if __name__ == '__main__': |
| 174 | main() |