blob: 11b867914534ac2a26a09eff82deaa49be0b8c19 [file] [log] [blame]
Mike Frysingerd03e6b52019-08-03 12:49:01 -04001#!/usr/bin/python2
Chris Sosa621509d2012-04-04 16:02:52 -07002
3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Module used to back up the mysql db and upload to Google Storage.
8
9Usage:
10 backup_mysql_db.py --type=weekly --gs_bucket=gs://my_bucket --keep 10
11
12 gs_bucket may refer to a local location by omitting gs:// and giving a local
13 path if desired for testing. The example usage above creates a dump
14 of the autotest db, uploads it to gs://my_bucket/weekly/dump_file.date and
15 cleans up older dumps if there are more than 10 in that directory.
16"""
17
18import datetime
19from distutils import version
20import logging
21import optparse
22import os
23import tempfile
24
25import common
26
27from autotest_lib.client.common_lib import error
Shuqian Zhao3fc87b72017-09-28 15:40:28 -070028from autotest_lib.client.common_lib import global_config
29from autotest_lib.client.common_lib import logging_manager
30from autotest_lib.client.common_lib import utils
Chris Sosa621509d2012-04-04 16:02:52 -070031from autotest_lib.utils import test_importer
32
Shuqian Zhao3fc87b72017-09-28 15:40:28 -070033from chromite.lib import metrics
34from chromite.lib import ts_mon_config
Chris Sosa621509d2012-04-04 16:02:52 -070035
36_ATTEMPTS = 3
37_GSUTIL_BIN = 'gsutil'
Scott Zawalski6a6c9d12012-04-16 13:03:07 -040038_GS_BUCKET = 'gs://chromeos-lab/backup/database'
Scott Zawalski6ccf2842012-08-31 20:45:43 -040039# TODO(scottz): Should we need to ignore more than one database a general
40# function should be designed that lists tables in the database and properly
41# creates the --ignore-table= args to be passed to mysqldump.
42# Tables to ignore when dumping all databases.
43# performance_schema is an internal database that cannot be dumped
44IGNORE_TABLES = ['performance_schema.cond_instances',
45 'performance_schema.events_waits_current',
46 'performance_schema.cond_instances',
47 'performance_schema.events_waits_history',
48 'performance_schema.events_waits_history_long',
49 'performance_schema.events_waits_summary_by_instance',
50 ('performance_schema.'
51 'events_waits_summary_by_thread_by_event_name'),
52 'performance_schema.events_waits_summary_global_by_event_name',
53 'performance_schema.file_instances',
54 'performance_schema.file_summary_by_event_name',
55 'performance_schema.file_summary_by_instance',
56 'performance_schema.mutex_instances',
57 'performance_schema.performance_timers',
58 'performance_schema.rwlock_instances',
59 'performance_schema.setup_consumers',
60 'performance_schema.setup_instruments',
61 'performance_schema.setup_timers',
62 'performance_schema.threads']
Prashanth B064c9492014-09-06 15:29:32 -070063
64# Conventional mysqldump schedules.
Chris Sosa621509d2012-04-04 16:02:52 -070065_DAILY = 'daily'
66_WEEKLY = 'weekly'
67_MONTHLY = 'monthly'
Prashanth B064c9492014-09-06 15:29:32 -070068
Jacob Kopczynski28718872018-08-20 14:54:44 -070069# Dump of server db only
Fang Deng4e6ba492015-12-11 15:44:04 -080070_SERVER_DB = 'server_db'
71
Prashanth B064c9492014-09-06 15:29:32 -070072# Contrary to a conventional mysql dump which takes O(hours) on large databases,
73# a host dump is the cheapest form of backup possible. We dump the output of a
74# of a mysql command showing all hosts and their pool labels to a text file that
75# is backed up to google storage.
76_ONLY_HOSTS = 'only_hosts'
Jakob Jueliche0e9c932014-09-24 14:40:48 -070077_ONLY_SHARDS = 'only_shards'
Jacob Kopczynski8a329a92019-06-03 12:26:35 -070078_SCHEDULER_TYPES = [_SERVER_DB, _ONLY_HOSTS, _ONLY_SHARDS,
Jacob Kopczynski1a05dc02018-09-02 09:30:42 -070079 _DAILY, _WEEKLY, _MONTHLY]
Fang Deng4e6ba492015-12-11 15:44:04 -080080
81class BackupError(Exception):
82 """Raised for error occurred during backup."""
Chris Sosa621509d2012-04-04 16:02:52 -070083
84
85class MySqlArchiver(object):
86 """Class that archives the Autotest MySQL DB to Google Storage.
87
88 Vars:
89 gs_dir: The path to the directory in Google Storage that this dump file
90 will be uploaded to.
91 number_to_keep: The number of dumps we should store.
92 """
Fang Deng4e6ba492015-12-11 15:44:04 -080093 _AUTOTEST_DB = "chromeos_autotest_db"
94 _SERVER_DB = "chromeos_lab_servers"
Chris Sosa621509d2012-04-04 16:02:52 -070095
96
97 def __init__(self, scheduled_type, number_to_keep, gs_bucket):
Fang Deng4e6ba492015-12-11 15:44:04 -080098 # For conventional scheduled type, we back up all databases.
99 # self._db is only used when scheduled_type is not
100 # conventional scheduled type.
101 self._db = self._get_db_name(scheduled_type)
Chris Sosa621509d2012-04-04 16:02:52 -0700102 self._gs_dir = '/'.join([gs_bucket, scheduled_type])
103 self._number_to_keep = number_to_keep
Prashanth B064c9492014-09-06 15:29:32 -0700104 self._type = scheduled_type
Chris Sosa621509d2012-04-04 16:02:52 -0700105
106
Fang Deng4e6ba492015-12-11 15:44:04 -0800107 @classmethod
108 def _get_db_name(cls, scheduled_type):
109 """Get the db name to backup.
110
111 @param scheduled_type: one of _SCHEDULER_TYPES.
112
113 @returns: The name of the db to backup.
114 Or None for backup all dbs.
115 """
116 if scheduled_type == _SERVER_DB:
117 return cls._SERVER_DB
118 elif scheduled_type in [_ONLY_HOSTS, _ONLY_SHARDS]:
119 return cls._AUTOTEST_DB
120 else:
121 return None
122
Chris Sosa621509d2012-04-04 16:02:52 -0700123 @staticmethod
124 def _get_user_pass():
125 """Returns a tuple containing the user/pass to use to access the DB."""
126 user = global_config.global_config.get_config_value(
127 'CROS', 'db_backup_user')
128 password = global_config.global_config.get_config_value(
129 'CROS', 'db_backup_password')
130 return user, password
131
132
133 def create_mysql_dump(self):
134 """Returns the path to a mysql dump of the current autotest DB."""
135 user, password = self._get_user_pass()
136 _, filename = tempfile.mkstemp('autotest_db_dump')
137 logging.debug('Dumping mysql database to file %s', filename)
Scott Zawalski6ccf2842012-08-31 20:45:43 -0400138 extra_dump_args = ''
139 for entry in IGNORE_TABLES:
140 extra_dump_args += '--ignore-table=%s ' % entry
Jacob Kopczynski93cf7d62018-08-20 14:32:30 -0700141 if self._type in [_WEEKLY, _MONTHLY]:
Jacob Kopczynski28718872018-08-20 14:54:44 -0700142 extra_dump_args += '--dump-slave '
Scott Zawalski6ccf2842012-08-31 20:45:43 -0400143
Fang Deng4e6ba492015-12-11 15:44:04 -0800144 if not self._db:
145 extra_dump_args += "--all-databases"
146 db_name = self._db or ''
147 utils.system('set -o pipefail; mysqldump --user=%s '
148 '--password=%s %s %s| gzip - > %s' % (
149 user, password, extra_dump_args, db_name, filename))
Chris Sosa621509d2012-04-04 16:02:52 -0700150 return filename
151
152
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700153 def _create_dump_from_query(self, query):
154 """Dumps result of a query into a text file.
155
156 @param query: Query to execute.
157
158 @return: The path to a tempfile containing the response of the query.
159 """
Fang Deng4e6ba492015-12-11 15:44:04 -0800160 if not self._db:
161 raise BackupError("_create_dump_from_query requires a specific db.")
162 parameters = {'db': self._db, 'query': query}
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700163 parameters['user'], parameters['password'] = self._get_user_pass()
164 _, parameters['filename'] = tempfile.mkstemp('autotest_db_dump')
165 utils.system(
166 'set -o pipefail; mysql -u %(user)s -p%(password)s '
Fang Deng4e6ba492015-12-11 15:44:04 -0800167 '%(db)s -e "%(query)s" > %(filename)s' %
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700168 parameters)
169 return parameters['filename']
170
171
Prashanth B064c9492014-09-06 15:29:32 -0700172 def create_host_dump(self):
173 """Dumps hosts and their labels into a text file.
174
175 @return: The path to a tempfile containing a dump of
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700176 hosts and their pool labels.
Prashanth B064c9492014-09-06 15:29:32 -0700177 """
Xixuan Wuc7049db2017-12-18 16:52:01 -0800178 respect_static_labels = global_config.global_config.get_config_value(
179 'SKYLAB', 'respect_static_labels', type=bool, default=False)
180 template = ('SELECT hosts.hostname, labels.name FROM afe_hosts AS '
181 'hosts JOIN %(hosts_labels_table)s AS hlt ON '
182 'hosts.id = hlt.host_id '
183 'JOIN %(labels_table)s AS labels '
184 'ON labels.id = hlt.%(column)s '
185 'WHERE labels.name LIKE \'%%pool%%\';')
186 if respect_static_labels:
187 # HACK: We're not checking the replaced_by_static_label on the
188 # pool label and just hard coding the fact that pool labels are
189 # indeed static labels. Expedience.
190 query = template % {
191 'hosts_labels_table': 'afe_static_hosts_labels',
192 'labels_table': 'afe_static_labels',
193 'column': 'staticlabel_id',
194 }
195 else:
196 query = template % {
197 'hosts_labels_table': 'afe_hosts_labels',
198 'labels_table': 'afe_labels',
199 'column': 'label_id',
200 }
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700201 return self._create_dump_from_query(query)
202
203
204 def create_shards_dump(self):
205 """Dumps shards and their labels into a text file.
206
207 @return: The path to a tempfile containing a dump of
208 shards and their labels.
209 """
210 query = ('SELECT hostname, labels.name FROM afe_shards AS shards '
211 'JOIN afe_shards_labels '
212 'ON shards.id = afe_shards_labels.shard_id '
213 'JOIN afe_labels AS labels '
214 'ON labels.id = afe_shards_labels.label_id;')
215 return self._create_dump_from_query(query)
Prashanth B064c9492014-09-06 15:29:32 -0700216
217
218 def dump(self):
219 """Creates a data dump based on the type of schedule.
220
221 @return: The path to a file containing the dump.
222 """
223 if self._type == _ONLY_HOSTS:
224 return self.create_host_dump()
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700225 if self._type == _ONLY_SHARDS:
226 return self.create_shards_dump()
Prashanth B064c9492014-09-06 15:29:32 -0700227 return self.create_mysql_dump()
228
229
230 def _get_name(self):
231 """Returns the name of the dump as presented to google storage."""
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700232 if self._type in [_ONLY_HOSTS, _ONLY_SHARDS]:
233 file_type = 'txt'
234 else:
235 file_type = 'gz'
Prashanth B064c9492014-09-06 15:29:32 -0700236 return 'autotest-dump.%s.%s' % (
237 datetime.datetime.now().strftime('%y.%m.%d'), file_type)
Chris Sosa621509d2012-04-04 16:02:52 -0700238
239
240 @staticmethod
241 def _retry_run(cmd):
242 """Run the specified |cmd| string, retrying if necessary.
243
244 Args:
245 cmd: The command to run.
246 """
247 for attempt in range(_ATTEMPTS):
248 try:
249 return utils.system_output(cmd)
250 except error.CmdError:
251 if attempt == _ATTEMPTS - 1:
252 raise
253 else:
254 logging.error('Failed to run %r', cmd)
255
256
257 def upload_to_google_storage(self, dump_file):
Prashanth B064c9492014-09-06 15:29:32 -0700258 """Uploads the given |dump_file| to Google Storage.
259
260 @param dump_file: The path to the file containing the dump.
261 """
Chris Sosa621509d2012-04-04 16:02:52 -0700262 cmd = '%(gs_util)s cp %(dump_file)s %(gs_dir)s/%(name)s'
263 input_dict = dict(gs_util=_GSUTIL_BIN, dump_file=dump_file,
264 name=self._get_name(), gs_dir=self._gs_dir)
265 cmd = cmd % input_dict
266 logging.debug('Uploading mysql dump to google storage')
267 self._retry_run(cmd)
268 os.remove(dump_file)
269
270
271 def _get_gs_command(self, cmd):
272 """Returns an array representing the command for rm or ls."""
273 # Helpful code to allow us to test without gs.
274 assert cmd in ['rm', 'ls']
275 gs_bin = _GSUTIL_BIN
Scott Zawalski6a6c9d12012-04-16 13:03:07 -0400276 if self._gs_dir.startswith('gs://'):
277 cmd_array = [gs_bin, cmd]
278 else:
279 cmd_array = [cmd]
Chris Sosa621509d2012-04-04 16:02:52 -0700280
281 return cmd_array
282
283
284 def _do_ls(self):
285 """Returns the output of running ls on the gs bucket."""
286 cmd = self._get_gs_command('ls') + [self._gs_dir]
287 return self._retry_run(' '.join(cmd))
288
289
290 def cleanup(self):
291 """Cleans up the gs bucket to ensure we don't over archive."""
292 logging.debug('Cleaning up previously archived dump files.')
293 listing = self._do_ls()
294 ordered_listing = sorted(listing.splitlines(), key=version.LooseVersion)
295 if len(ordered_listing) < self._number_to_keep:
296 logging.debug('Cleanup found nothing to do.')
297 return
298
299 to_remove = ordered_listing[:-self._number_to_keep]
300 rm_cmd = self._get_gs_command('rm')
301 for artifact in to_remove:
Shuqian Zhao3fc87b72017-09-28 15:40:28 -0700302 cmd = ' '.join(rm_cmd + [artifact])
Chris Sosa621509d2012-04-04 16:02:52 -0700303 self._retry_run(cmd)
304
305
306def parse_options():
307 """Parses given options."""
308 parser = optparse.OptionParser()
309 parser.add_option('--gs_bucket', default=_GS_BUCKET,
310 help='Google storage bucket to store mysql db dumps.')
311 parser.add_option('--keep', default=10, type=int,
312 help='Number of dumps to keep of specified type.')
313 parser.add_option('--type', default=_DAILY,
314 help='The type of mysql dump to store.')
315 parser.add_option('--verbose', default=False, action='store_true',
316 help='Google storage bucket to store mysql db dumps.')
317 options = parser.parse_args()[0]
318 if options.type not in _SCHEDULER_TYPES:
319 parser.error('Type must be either: %s.' % ', '.join(_SCHEDULER_TYPES))
320
321 return options
322
323
324def main():
325 """Runs the program."""
326 options = parse_options()
327 logging_manager.configure_logging(test_importer.TestImporterLoggingConfig(),
328 verbose=options.verbose)
Shuqian Zhao3fc87b72017-09-28 15:40:28 -0700329 backup_succeeded = False
330
331 with ts_mon_config.SetupTsMonGlobalState(service_name='mysql_db_backup',
332 indirect=True):
333 with metrics.SecondsTimer(
334 'chromeos/autotest/afe_db/backup/durations',
335 fields={'type': options.type}):
336 try:
337 logging.debug('Start db backup: %s', options.type)
338 archiver = MySqlArchiver(
339 options.type, options.keep, options.gs_bucket)
340 dump_file = archiver.dump()
341 logging.debug('Uploading backup: %s', options.type)
342 archiver.upload_to_google_storage(dump_file)
343 archiver.cleanup()
344 logging.debug('Db backup completed: %s', options.type)
345 backup_succeeded = True
346 finally:
347 metrics.Counter(
348 'chromeos/autotest/db/db_backup/completed').increment(
349 fields={'success': backup_succeeded,
350 'type': options.type})
Chris Sosa621509d2012-04-04 16:02:52 -0700351
352
353if __name__ == '__main__':
354 main()