blob: 64743facfd0fd06ce6ab96f70f61f04df79a632a [file] [log] [blame]
Chris Sosa621509d2012-04-04 16:02:52 -07001#!/usr/bin/python
2
3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Module used to back up the mysql db and upload to Google Storage.
8
9Usage:
10 backup_mysql_db.py --type=weekly --gs_bucket=gs://my_bucket --keep 10
11
12 gs_bucket may refer to a local location by omitting gs:// and giving a local
13 path if desired for testing. The example usage above creates a dump
14 of the autotest db, uploads it to gs://my_bucket/weekly/dump_file.date and
15 cleans up older dumps if there are more than 10 in that directory.
16"""
17
18import datetime
19from distutils import version
20import logging
21import optparse
22import os
23import tempfile
24
25import common
26
27from autotest_lib.client.common_lib import error
Shuqian Zhao3fc87b72017-09-28 15:40:28 -070028from autotest_lib.client.common_lib import global_config
29from autotest_lib.client.common_lib import logging_manager
30from autotest_lib.client.common_lib import utils
Chris Sosa621509d2012-04-04 16:02:52 -070031from autotest_lib.utils import test_importer
32
Shuqian Zhao3fc87b72017-09-28 15:40:28 -070033from chromite.lib import metrics
34from chromite.lib import ts_mon_config
Chris Sosa621509d2012-04-04 16:02:52 -070035
36_ATTEMPTS = 3
37_GSUTIL_BIN = 'gsutil'
Scott Zawalski6a6c9d12012-04-16 13:03:07 -040038_GS_BUCKET = 'gs://chromeos-lab/backup/database'
Scott Zawalski6ccf2842012-08-31 20:45:43 -040039# TODO(scottz): Should we need to ignore more than one database a general
40# function should be designed that lists tables in the database and properly
41# creates the --ignore-table= args to be passed to mysqldump.
42# Tables to ignore when dumping all databases.
43# performance_schema is an internal database that cannot be dumped
44IGNORE_TABLES = ['performance_schema.cond_instances',
45 'performance_schema.events_waits_current',
46 'performance_schema.cond_instances',
47 'performance_schema.events_waits_history',
48 'performance_schema.events_waits_history_long',
49 'performance_schema.events_waits_summary_by_instance',
50 ('performance_schema.'
51 'events_waits_summary_by_thread_by_event_name'),
52 'performance_schema.events_waits_summary_global_by_event_name',
53 'performance_schema.file_instances',
54 'performance_schema.file_summary_by_event_name',
55 'performance_schema.file_summary_by_instance',
56 'performance_schema.mutex_instances',
57 'performance_schema.performance_timers',
58 'performance_schema.rwlock_instances',
59 'performance_schema.setup_consumers',
60 'performance_schema.setup_instruments',
61 'performance_schema.setup_timers',
62 'performance_schema.threads']
Prashanth B064c9492014-09-06 15:29:32 -070063
64# Conventional mysqldump schedules.
Chris Sosa621509d2012-04-04 16:02:52 -070065_DAILY = 'daily'
66_WEEKLY = 'weekly'
67_MONTHLY = 'monthly'
Prashanth B064c9492014-09-06 15:29:32 -070068
Fang Deng4e6ba492015-12-11 15:44:04 -080069# Back up server db
70_SERVER_DB = 'server_db'
71
Prashanth B064c9492014-09-06 15:29:32 -070072# Contrary to a conventional mysql dump which takes O(hours) on large databases,
73# a host dump is the cheapest form of backup possible. We dump the output of a
74# of a mysql command showing all hosts and their pool labels to a text file that
75# is backed up to google storage.
76_ONLY_HOSTS = 'only_hosts'
Jakob Jueliche0e9c932014-09-24 14:40:48 -070077_ONLY_SHARDS = 'only_shards'
Fang Deng4e6ba492015-12-11 15:44:04 -080078_SCHEDULER_TYPES = [_SERVER_DB, _ONLY_HOSTS, _ONLY_SHARDS, _DAILY, _WEEKLY, _MONTHLY]
79
80class BackupError(Exception):
81 """Raised for error occurred during backup."""
Chris Sosa621509d2012-04-04 16:02:52 -070082
83
84class MySqlArchiver(object):
85 """Class that archives the Autotest MySQL DB to Google Storage.
86
87 Vars:
88 gs_dir: The path to the directory in Google Storage that this dump file
89 will be uploaded to.
90 number_to_keep: The number of dumps we should store.
91 """
Fang Deng4e6ba492015-12-11 15:44:04 -080092 _AUTOTEST_DB = "chromeos_autotest_db"
93 _SERVER_DB = "chromeos_lab_servers"
Chris Sosa621509d2012-04-04 16:02:52 -070094
95
96 def __init__(self, scheduled_type, number_to_keep, gs_bucket):
Fang Deng4e6ba492015-12-11 15:44:04 -080097 # For conventional scheduled type, we back up all databases.
98 # self._db is only used when scheduled_type is not
99 # conventional scheduled type.
100 self._db = self._get_db_name(scheduled_type)
Chris Sosa621509d2012-04-04 16:02:52 -0700101 self._gs_dir = '/'.join([gs_bucket, scheduled_type])
102 self._number_to_keep = number_to_keep
Prashanth B064c9492014-09-06 15:29:32 -0700103 self._type = scheduled_type
Chris Sosa621509d2012-04-04 16:02:52 -0700104
105
Fang Deng4e6ba492015-12-11 15:44:04 -0800106 @classmethod
107 def _get_db_name(cls, scheduled_type):
108 """Get the db name to backup.
109
110 @param scheduled_type: one of _SCHEDULER_TYPES.
111
112 @returns: The name of the db to backup.
113 Or None for backup all dbs.
114 """
115 if scheduled_type == _SERVER_DB:
116 return cls._SERVER_DB
117 elif scheduled_type in [_ONLY_HOSTS, _ONLY_SHARDS]:
118 return cls._AUTOTEST_DB
119 else:
120 return None
121
Chris Sosa621509d2012-04-04 16:02:52 -0700122 @staticmethod
123 def _get_user_pass():
124 """Returns a tuple containing the user/pass to use to access the DB."""
125 user = global_config.global_config.get_config_value(
126 'CROS', 'db_backup_user')
127 password = global_config.global_config.get_config_value(
128 'CROS', 'db_backup_password')
129 return user, password
130
131
132 def create_mysql_dump(self):
133 """Returns the path to a mysql dump of the current autotest DB."""
134 user, password = self._get_user_pass()
135 _, filename = tempfile.mkstemp('autotest_db_dump')
136 logging.debug('Dumping mysql database to file %s', filename)
Scott Zawalski6ccf2842012-08-31 20:45:43 -0400137 extra_dump_args = ''
138 for entry in IGNORE_TABLES:
139 extra_dump_args += '--ignore-table=%s ' % entry
140
Fang Deng4e6ba492015-12-11 15:44:04 -0800141 if not self._db:
142 extra_dump_args += "--all-databases"
143 db_name = self._db or ''
144 utils.system('set -o pipefail; mysqldump --user=%s '
145 '--password=%s %s %s| gzip - > %s' % (
146 user, password, extra_dump_args, db_name, filename))
Chris Sosa621509d2012-04-04 16:02:52 -0700147 return filename
148
149
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700150 def _create_dump_from_query(self, query):
151 """Dumps result of a query into a text file.
152
153 @param query: Query to execute.
154
155 @return: The path to a tempfile containing the response of the query.
156 """
Fang Deng4e6ba492015-12-11 15:44:04 -0800157 if not self._db:
158 raise BackupError("_create_dump_from_query requires a specific db.")
159 parameters = {'db': self._db, 'query': query}
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700160 parameters['user'], parameters['password'] = self._get_user_pass()
161 _, parameters['filename'] = tempfile.mkstemp('autotest_db_dump')
162 utils.system(
163 'set -o pipefail; mysql -u %(user)s -p%(password)s '
Fang Deng4e6ba492015-12-11 15:44:04 -0800164 '%(db)s -e "%(query)s" > %(filename)s' %
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700165 parameters)
166 return parameters['filename']
167
168
Prashanth B064c9492014-09-06 15:29:32 -0700169 def create_host_dump(self):
170 """Dumps hosts and their labels into a text file.
171
172 @return: The path to a tempfile containing a dump of
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700173 hosts and their pool labels.
Prashanth B064c9492014-09-06 15:29:32 -0700174 """
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700175 query = ('SELECT hostname, labels.name FROM afe_hosts AS hosts '
176 'JOIN afe_hosts_labels ON hosts.id = afe_hosts_labels.host_id '
177 'JOIN afe_labels AS labels '
178 'ON labels.id = afe_hosts_labels.label_id '
179 'WHERE labels.name LIKE \'%%pool%%\';')
180 return self._create_dump_from_query(query)
181
182
183 def create_shards_dump(self):
184 """Dumps shards and their labels into a text file.
185
186 @return: The path to a tempfile containing a dump of
187 shards and their labels.
188 """
189 query = ('SELECT hostname, labels.name FROM afe_shards AS shards '
190 'JOIN afe_shards_labels '
191 'ON shards.id = afe_shards_labels.shard_id '
192 'JOIN afe_labels AS labels '
193 'ON labels.id = afe_shards_labels.label_id;')
194 return self._create_dump_from_query(query)
Prashanth B064c9492014-09-06 15:29:32 -0700195
196
197 def dump(self):
198 """Creates a data dump based on the type of schedule.
199
200 @return: The path to a file containing the dump.
201 """
202 if self._type == _ONLY_HOSTS:
203 return self.create_host_dump()
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700204 if self._type == _ONLY_SHARDS:
205 return self.create_shards_dump()
Prashanth B064c9492014-09-06 15:29:32 -0700206 return self.create_mysql_dump()
207
208
209 def _get_name(self):
210 """Returns the name of the dump as presented to google storage."""
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700211 if self._type in [_ONLY_HOSTS, _ONLY_SHARDS]:
212 file_type = 'txt'
213 else:
214 file_type = 'gz'
Prashanth B064c9492014-09-06 15:29:32 -0700215 return 'autotest-dump.%s.%s' % (
216 datetime.datetime.now().strftime('%y.%m.%d'), file_type)
Chris Sosa621509d2012-04-04 16:02:52 -0700217
218
219 @staticmethod
220 def _retry_run(cmd):
221 """Run the specified |cmd| string, retrying if necessary.
222
223 Args:
224 cmd: The command to run.
225 """
226 for attempt in range(_ATTEMPTS):
227 try:
228 return utils.system_output(cmd)
229 except error.CmdError:
230 if attempt == _ATTEMPTS - 1:
231 raise
232 else:
233 logging.error('Failed to run %r', cmd)
234
235
236 def upload_to_google_storage(self, dump_file):
Prashanth B064c9492014-09-06 15:29:32 -0700237 """Uploads the given |dump_file| to Google Storage.
238
239 @param dump_file: The path to the file containing the dump.
240 """
Chris Sosa621509d2012-04-04 16:02:52 -0700241 cmd = '%(gs_util)s cp %(dump_file)s %(gs_dir)s/%(name)s'
242 input_dict = dict(gs_util=_GSUTIL_BIN, dump_file=dump_file,
243 name=self._get_name(), gs_dir=self._gs_dir)
244 cmd = cmd % input_dict
245 logging.debug('Uploading mysql dump to google storage')
246 self._retry_run(cmd)
247 os.remove(dump_file)
248
249
250 def _get_gs_command(self, cmd):
251 """Returns an array representing the command for rm or ls."""
252 # Helpful code to allow us to test without gs.
253 assert cmd in ['rm', 'ls']
254 gs_bin = _GSUTIL_BIN
Scott Zawalski6a6c9d12012-04-16 13:03:07 -0400255 if self._gs_dir.startswith('gs://'):
256 cmd_array = [gs_bin, cmd]
257 else:
258 cmd_array = [cmd]
Chris Sosa621509d2012-04-04 16:02:52 -0700259
260 return cmd_array
261
262
263 def _do_ls(self):
264 """Returns the output of running ls on the gs bucket."""
265 cmd = self._get_gs_command('ls') + [self._gs_dir]
266 return self._retry_run(' '.join(cmd))
267
268
269 def cleanup(self):
270 """Cleans up the gs bucket to ensure we don't over archive."""
271 logging.debug('Cleaning up previously archived dump files.')
272 listing = self._do_ls()
273 ordered_listing = sorted(listing.splitlines(), key=version.LooseVersion)
274 if len(ordered_listing) < self._number_to_keep:
275 logging.debug('Cleanup found nothing to do.')
276 return
277
278 to_remove = ordered_listing[:-self._number_to_keep]
279 rm_cmd = self._get_gs_command('rm')
280 for artifact in to_remove:
Shuqian Zhao3fc87b72017-09-28 15:40:28 -0700281 cmd = ' '.join(rm_cmd + [artifact])
Chris Sosa621509d2012-04-04 16:02:52 -0700282 self._retry_run(cmd)
283
284
285def parse_options():
286 """Parses given options."""
287 parser = optparse.OptionParser()
288 parser.add_option('--gs_bucket', default=_GS_BUCKET,
289 help='Google storage bucket to store mysql db dumps.')
290 parser.add_option('--keep', default=10, type=int,
291 help='Number of dumps to keep of specified type.')
292 parser.add_option('--type', default=_DAILY,
293 help='The type of mysql dump to store.')
294 parser.add_option('--verbose', default=False, action='store_true',
295 help='Google storage bucket to store mysql db dumps.')
296 options = parser.parse_args()[0]
297 if options.type not in _SCHEDULER_TYPES:
298 parser.error('Type must be either: %s.' % ', '.join(_SCHEDULER_TYPES))
299
300 return options
301
302
303def main():
304 """Runs the program."""
305 options = parse_options()
306 logging_manager.configure_logging(test_importer.TestImporterLoggingConfig(),
307 verbose=options.verbose)
Shuqian Zhao3fc87b72017-09-28 15:40:28 -0700308 backup_succeeded = False
309
310 with ts_mon_config.SetupTsMonGlobalState(service_name='mysql_db_backup',
311 indirect=True):
312 with metrics.SecondsTimer(
313 'chromeos/autotest/afe_db/backup/durations',
314 fields={'type': options.type}):
315 try:
316 logging.debug('Start db backup: %s', options.type)
317 archiver = MySqlArchiver(
318 options.type, options.keep, options.gs_bucket)
319 dump_file = archiver.dump()
320 logging.debug('Uploading backup: %s', options.type)
321 archiver.upload_to_google_storage(dump_file)
322 archiver.cleanup()
323 logging.debug('Db backup completed: %s', options.type)
324 backup_succeeded = True
325 finally:
326 metrics.Counter(
327 'chromeos/autotest/db/db_backup/completed').increment(
328 fields={'success': backup_succeeded,
329 'type': options.type})
Chris Sosa621509d2012-04-04 16:02:52 -0700330
331
332if __name__ == '__main__':
333 main()