blob: aeeb63dcbd89ee07c6f224878e2a3d59678fca8f [file] [log] [blame]
Shuqian Zhao31985862016-09-01 16:20:01 -07001#!/usr/bin/python
2
3# Copyright 2016 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Queries a MySQL database and emits status metrics to Monarch.
8
9Note: confusingly, 'Innodb_buffer_pool_reads' is actually the cache-misses, not
10the number of reads to the buffer pool. 'Innodb_buffer_pool_read_requests'
11corresponds to the number of reads the the buffer pool.
12"""
Paul Hobbsed83f292017-02-24 22:57:19 -080013import logging
14import sys
Shuqian Zhao31985862016-09-01 16:20:01 -070015
16import MySQLdb
17import time
18
19import common
20
Shuqian Zhao31985862016-09-01 16:20:01 -070021from autotest_lib.client.common_lib import global_config
Paul Hobbse86ccd82017-03-24 19:39:24 -070022from autotest_lib.client.common_lib.cros import retry
Dan Shi5e2efb72017-02-07 11:40:23 -080023
Prathmesh Prabhuae7bb7e2018-02-27 11:29:10 -080024from chromite.lib import metrics
25from chromite.lib import ts_mon_config
Shuqian Zhao31985862016-09-01 16:20:01 -070026
27AT_DIR='/usr/local/autotest'
28DEFAULT_USER = global_config.global_config.get_config_value(
29 'CROS', 'db_backup_user', type=str, default='')
30DEFAULT_PASSWD = global_config.global_config.get_config_value(
31 'CROS', 'db_backup_password', type=str, default='')
Paul Hobbse86ccd82017-03-24 19:39:24 -070032
Shuqian Zhao31985862016-09-01 16:20:01 -070033LOOP_INTERVAL = 60
Paul Hobbse86ccd82017-03-24 19:39:24 -070034
Paul Hobbsd4abbf42016-09-16 16:52:24 -070035EMITTED_STATUSES_COUNTERS = [
Jacob Kopczynski35fa24c2018-04-17 19:53:42 -070036 'bytes_received',
37 'bytes_sent',
38 'connections',
39 'Innodb_buffer_pool_read_requests',
40 'Innodb_buffer_pool_reads',
41 'Innodb_row_lock_waits',
42 'questions',
43 'slow_queries',
44 'threads_created',
Shuqian Zhao31985862016-09-01 16:20:01 -070045]
46
Paul Hobbsd66e6a92017-09-13 15:08:10 -070047EMITTED_STATUS_GAUGES = [
Jacob Kopczynski35fa24c2018-04-17 19:53:42 -070048 'Innodb_row_lock_time_avg',
49 'Innodb_row_lock_current_waits',
50 'threads_running',
51 'threads_connected',
Paul Hobbsd66e6a92017-09-13 15:08:10 -070052]
Paul Hobbsd4abbf42016-09-16 16:52:24 -070053
Paul Hobbs7bc6b342016-09-20 12:13:53 -070054
Paul Hobbseb55c952017-07-31 19:09:38 -070055class RetryingConnection(object):
Paul Hobbse86ccd82017-03-24 19:39:24 -070056 """Maintains a db connection and a cursor."""
Paul Hobbseb55c952017-07-31 19:09:38 -070057 INITIAL_SLEEP_SECONDS = 20
58 MAX_TIMEOUT_SECONDS = 60 * 60
Paul Hobbsb1ab90a2016-09-16 14:33:19 -070059
Paul Hobbse86ccd82017-03-24 19:39:24 -070060 def __init__(self, *args, **kwargs):
61 self.args = args
62 self.kwargs = kwargs
63 self.db = None
64 self.cursor = None
65
66 def Connect(self):
67 """Establishes a MySQL connection and creates a cursor."""
68 self.db = MySQLdb.connect(*self.args, **self.kwargs)
69 self.cursor = self.db.cursor()
70
71 def Reconnect(self):
Paul Hobbseb55c952017-07-31 19:09:38 -070072 """Attempts to close the connection, then reconnects."""
73 try:
74 self.cursor.close()
75 self.db.close()
76 except MySQLdb.Error:
77 pass
78 self.Connect()
79
80 def RetryWith(self, func):
81 """Run a function, retrying on OperationalError."""
82 return retry.retry(
83 MySQLdb.OperationalError,
84 delay_sec=self.INITIAL_SLEEP_SECONDS,
85 timeout_min=self.MAX_TIMEOUT_SECONDS,
86 callback=self.Reconnect
87 )(func)()
88
89 def Execute(self, *args, **kwargs):
90 """Runs .execute on the cursor, reconnecting on failure."""
91 def _Execute():
92 return self.cursor.execute(*args, **kwargs)
93 return self.RetryWith(_Execute)
Shuqian Zhao31985862016-09-01 16:20:01 -070094
Paul Hobbs09ea0e52017-08-04 14:42:46 -070095 def Fetchall(self):
96 """Runs .fetchall on the cursor."""
97 return self.cursor.fetchall()
98
Shuqian Zhao31985862016-09-01 16:20:01 -070099
Paul Hobbse86ccd82017-03-24 19:39:24 -0700100def GetStatus(connection, status):
Paul Hobbseb55c952017-07-31 19:09:38 -0700101 """Get the status variable from the database, retrying on failure.
Shuqian Zhao31985862016-09-01 16:20:01 -0700102
Paul Hobbseb55c952017-07-31 19:09:38 -0700103 @param connection: MySQLdb cursor to query with.
104 @param status: Name of the status variable.
105 @returns The mysql query result.
106 """
Paul Hobbse587fd62017-09-13 11:52:30 -0700107 connection.Execute('SHOW GLOBAL STATUS LIKE "%s";' % status)
108 output = connection.Fetchall()[0][1]
Paul Hobbsd4abbf42016-09-16 16:52:24 -0700109
Paul Hobbseb55c952017-07-31 19:09:38 -0700110 if not output:
111 logging.error('Cannot find any global status like %s', status)
Paul Hobbse86ccd82017-03-24 19:39:24 -0700112
Paul Hobbseb55c952017-07-31 19:09:38 -0700113 return int(output)
Shuqian Zhao31985862016-09-01 16:20:01 -0700114
115
Paul Hobbse86ccd82017-03-24 19:39:24 -0700116def QueryAndEmit(baselines, conn):
Shuqian Zhao31985862016-09-01 16:20:01 -0700117 """Queries MySQL for important stats and emits Monarch metrics
118
Paul Hobbsd4abbf42016-09-16 16:52:24 -0700119 @param baselines: A dict containing the initial values for the cumulative
120 metrics.
Paul Hobbse86ccd82017-03-24 19:39:24 -0700121 @param conn: The mysql connection object.
Shuqian Zhao31985862016-09-01 16:20:01 -0700122 """
Paul Hobbsd4abbf42016-09-16 16:52:24 -0700123 for status in EMITTED_STATUSES_COUNTERS:
Paul Hobbsd4abbf42016-09-16 16:52:24 -0700124 metric_name = 'chromeos/autotest/afe_db/%s' % status.lower()
Paul Hobbse86ccd82017-03-24 19:39:24 -0700125 delta = GetStatus(conn, status) - baselines[status]
Paul Hobbsd4abbf42016-09-16 16:52:24 -0700126 metrics.Counter(metric_name).set(delta)
Shuqian Zhao31985862016-09-01 16:20:01 -0700127
Paul Hobbsd4abbf42016-09-16 16:52:24 -0700128 for status in EMITTED_STATUS_GAUGES:
129 metric_name = 'chromeos/autotest/afe_db/%s' % status.lower()
Paul Hobbse86ccd82017-03-24 19:39:24 -0700130 metrics.Gauge(metric_name).set(GetStatus(conn, status))
Shuqian Zhao31985862016-09-01 16:20:01 -0700131
Paul Hobbse86ccd82017-03-24 19:39:24 -0700132 pages_free = GetStatus(conn, 'Innodb_buffer_pool_pages_free')
133 pages_total = GetStatus(conn, 'Innodb_buffer_pool_pages_total')
Shuqian Zhao31985862016-09-01 16:20:01 -0700134
135 metrics.Gauge('chromeos/autotest/afe_db/buffer_pool_pages').set(
136 pages_free, fields={'used': False})
137
138 metrics.Gauge('chromeos/autotest/afe_db/buffer_pool_pages').set(
139 pages_total - pages_free, fields={'used': True})
140
141
Paul Hobbse86ccd82017-03-24 19:39:24 -0700142def main():
143 """Sets up ts_mon and repeatedly queries MySQL stats"""
144 logging.basicConfig(stream=sys.stdout, level=logging.INFO)
Paul Hobbseb55c952017-07-31 19:09:38 -0700145 conn = RetryingConnection('localhost', DEFAULT_USER, DEFAULT_PASSWD)
Paul Hobbse86ccd82017-03-24 19:39:24 -0700146 conn.Connect()
147
Paul Hobbs10b10722018-01-26 10:01:02 -0800148 # TODO(crbug.com/803566) Use indirect=False to mitigate orphan mysql_stats
149 # processes overwhelming shards.
150 with ts_mon_config.SetupTsMonGlobalState('mysql_stats', indirect=False):
Paul Hobbse86ccd82017-03-24 19:39:24 -0700151 QueryLoop(conn)
152
153
154def QueryLoop(conn):
155 """Queries and emits metrics every LOOP_INTERVAL seconds.
156
157 @param conn: The mysql connection object.
158 """
159 # Get the baselines for cumulative metrics. Otherwise the windowed rate at
160 # the very beginning will be extremely high as it shoots up from 0 to its
161 # current value.
162 baselines = dict((s, GetStatus(conn, s))
163 for s in EMITTED_STATUSES_COUNTERS)
164
165 while True:
166 now = time.time()
167 QueryAndEmit(baselines, conn)
168 time_spent = time.time() - now
169 sleep_duration = LOOP_INTERVAL - time_spent
Stéphane Marchesin33fdd872017-05-09 17:08:27 -0700170 time.sleep(max(0, sleep_duration))
Paul Hobbse86ccd82017-03-24 19:39:24 -0700171
172
Shuqian Zhao31985862016-09-01 16:20:01 -0700173if __name__ == '__main__':
174 main()