blob: c74fb3db9aac6ade13afbdee45e80b2366d5ba34 [file] [log] [blame]
Chris Sosa621509d2012-04-04 16:02:52 -07001#!/usr/bin/python
2
3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Module used to back up the mysql db and upload to Google Storage.
8
9Usage:
10 backup_mysql_db.py --type=weekly --gs_bucket=gs://my_bucket --keep 10
11
12 gs_bucket may refer to a local location by omitting gs:// and giving a local
13 path if desired for testing. The example usage above creates a dump
14 of the autotest db, uploads it to gs://my_bucket/weekly/dump_file.date and
15 cleans up older dumps if there are more than 10 in that directory.
16"""
17
18import datetime
19from distutils import version
20import logging
21import optparse
22import os
23import tempfile
24
25import common
26
27from autotest_lib.client.common_lib import error
28from autotest_lib.client.common_lib import global_config, logging_manager, utils
29from autotest_lib.utils import test_importer
30
31
32_ATTEMPTS = 3
33_GSUTIL_BIN = 'gsutil'
Scott Zawalski6a6c9d12012-04-16 13:03:07 -040034_GS_BUCKET = 'gs://chromeos-lab/backup/database'
Scott Zawalski6ccf2842012-08-31 20:45:43 -040035# TODO(scottz): Should we need to ignore more than one database a general
36# function should be designed that lists tables in the database and properly
37# creates the --ignore-table= args to be passed to mysqldump.
38# Tables to ignore when dumping all databases.
39# performance_schema is an internal database that cannot be dumped
40IGNORE_TABLES = ['performance_schema.cond_instances',
41 'performance_schema.events_waits_current',
42 'performance_schema.cond_instances',
43 'performance_schema.events_waits_history',
44 'performance_schema.events_waits_history_long',
45 'performance_schema.events_waits_summary_by_instance',
46 ('performance_schema.'
47 'events_waits_summary_by_thread_by_event_name'),
48 'performance_schema.events_waits_summary_global_by_event_name',
49 'performance_schema.file_instances',
50 'performance_schema.file_summary_by_event_name',
51 'performance_schema.file_summary_by_instance',
52 'performance_schema.mutex_instances',
53 'performance_schema.performance_timers',
54 'performance_schema.rwlock_instances',
55 'performance_schema.setup_consumers',
56 'performance_schema.setup_instruments',
57 'performance_schema.setup_timers',
58 'performance_schema.threads']
Prashanth B064c9492014-09-06 15:29:32 -070059
60# Conventional mysqldump schedules.
Chris Sosa621509d2012-04-04 16:02:52 -070061_DAILY = 'daily'
62_WEEKLY = 'weekly'
63_MONTHLY = 'monthly'
Prashanth B064c9492014-09-06 15:29:32 -070064
Fang Deng4e6ba492015-12-11 15:44:04 -080065# Back up server db
66_SERVER_DB = 'server_db'
67
Prashanth B064c9492014-09-06 15:29:32 -070068# Contrary to a conventional mysql dump which takes O(hours) on large databases,
69# a host dump is the cheapest form of backup possible. We dump the output of a
70# of a mysql command showing all hosts and their pool labels to a text file that
71# is backed up to google storage.
72_ONLY_HOSTS = 'only_hosts'
Jakob Jueliche0e9c932014-09-24 14:40:48 -070073_ONLY_SHARDS = 'only_shards'
Fang Deng4e6ba492015-12-11 15:44:04 -080074_SCHEDULER_TYPES = [_SERVER_DB, _ONLY_HOSTS, _ONLY_SHARDS, _DAILY, _WEEKLY, _MONTHLY]
75
76class BackupError(Exception):
77 """Raised for error occurred during backup."""
Chris Sosa621509d2012-04-04 16:02:52 -070078
79
80class MySqlArchiver(object):
81 """Class that archives the Autotest MySQL DB to Google Storage.
82
83 Vars:
84 gs_dir: The path to the directory in Google Storage that this dump file
85 will be uploaded to.
86 number_to_keep: The number of dumps we should store.
87 """
Fang Deng4e6ba492015-12-11 15:44:04 -080088 _AUTOTEST_DB = "chromeos_autotest_db"
89 _SERVER_DB = "chromeos_lab_servers"
Chris Sosa621509d2012-04-04 16:02:52 -070090
91
92 def __init__(self, scheduled_type, number_to_keep, gs_bucket):
Fang Deng4e6ba492015-12-11 15:44:04 -080093 # For conventional scheduled type, we back up all databases.
94 # self._db is only used when scheduled_type is not
95 # conventional scheduled type.
96 self._db = self._get_db_name(scheduled_type)
Chris Sosa621509d2012-04-04 16:02:52 -070097 self._gs_dir = '/'.join([gs_bucket, scheduled_type])
98 self._number_to_keep = number_to_keep
Prashanth B064c9492014-09-06 15:29:32 -070099 self._type = scheduled_type
Chris Sosa621509d2012-04-04 16:02:52 -0700100
101
Fang Deng4e6ba492015-12-11 15:44:04 -0800102 @classmethod
103 def _get_db_name(cls, scheduled_type):
104 """Get the db name to backup.
105
106 @param scheduled_type: one of _SCHEDULER_TYPES.
107
108 @returns: The name of the db to backup.
109 Or None for backup all dbs.
110 """
111 if scheduled_type == _SERVER_DB:
112 return cls._SERVER_DB
113 elif scheduled_type in [_ONLY_HOSTS, _ONLY_SHARDS]:
114 return cls._AUTOTEST_DB
115 else:
116 return None
117
Chris Sosa621509d2012-04-04 16:02:52 -0700118 @staticmethod
119 def _get_user_pass():
120 """Returns a tuple containing the user/pass to use to access the DB."""
121 user = global_config.global_config.get_config_value(
122 'CROS', 'db_backup_user')
123 password = global_config.global_config.get_config_value(
124 'CROS', 'db_backup_password')
125 return user, password
126
127
128 def create_mysql_dump(self):
129 """Returns the path to a mysql dump of the current autotest DB."""
130 user, password = self._get_user_pass()
131 _, filename = tempfile.mkstemp('autotest_db_dump')
132 logging.debug('Dumping mysql database to file %s', filename)
Scott Zawalski6ccf2842012-08-31 20:45:43 -0400133 extra_dump_args = ''
134 for entry in IGNORE_TABLES:
135 extra_dump_args += '--ignore-table=%s ' % entry
136
Fang Deng4e6ba492015-12-11 15:44:04 -0800137 if not self._db:
138 extra_dump_args += "--all-databases"
139 db_name = self._db or ''
140 utils.system('set -o pipefail; mysqldump --user=%s '
141 '--password=%s %s %s| gzip - > %s' % (
142 user, password, extra_dump_args, db_name, filename))
Chris Sosa621509d2012-04-04 16:02:52 -0700143 return filename
144
145
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700146 def _create_dump_from_query(self, query):
147 """Dumps result of a query into a text file.
148
149 @param query: Query to execute.
150
151 @return: The path to a tempfile containing the response of the query.
152 """
Fang Deng4e6ba492015-12-11 15:44:04 -0800153 if not self._db:
154 raise BackupError("_create_dump_from_query requires a specific db.")
155 parameters = {'db': self._db, 'query': query}
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700156 parameters['user'], parameters['password'] = self._get_user_pass()
157 _, parameters['filename'] = tempfile.mkstemp('autotest_db_dump')
158 utils.system(
159 'set -o pipefail; mysql -u %(user)s -p%(password)s '
Fang Deng4e6ba492015-12-11 15:44:04 -0800160 '%(db)s -e "%(query)s" > %(filename)s' %
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700161 parameters)
162 return parameters['filename']
163
164
Prashanth B064c9492014-09-06 15:29:32 -0700165 def create_host_dump(self):
166 """Dumps hosts and their labels into a text file.
167
168 @return: The path to a tempfile containing a dump of
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700169 hosts and their pool labels.
Prashanth B064c9492014-09-06 15:29:32 -0700170 """
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700171 query = ('SELECT hostname, labels.name FROM afe_hosts AS hosts '
172 'JOIN afe_hosts_labels ON hosts.id = afe_hosts_labels.host_id '
173 'JOIN afe_labels AS labels '
174 'ON labels.id = afe_hosts_labels.label_id '
175 'WHERE labels.name LIKE \'%%pool%%\';')
176 return self._create_dump_from_query(query)
177
178
179 def create_shards_dump(self):
180 """Dumps shards and their labels into a text file.
181
182 @return: The path to a tempfile containing a dump of
183 shards and their labels.
184 """
185 query = ('SELECT hostname, labels.name FROM afe_shards AS shards '
186 'JOIN afe_shards_labels '
187 'ON shards.id = afe_shards_labels.shard_id '
188 'JOIN afe_labels AS labels '
189 'ON labels.id = afe_shards_labels.label_id;')
190 return self._create_dump_from_query(query)
Prashanth B064c9492014-09-06 15:29:32 -0700191
192
193 def dump(self):
194 """Creates a data dump based on the type of schedule.
195
196 @return: The path to a file containing the dump.
197 """
198 if self._type == _ONLY_HOSTS:
199 return self.create_host_dump()
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700200 if self._type == _ONLY_SHARDS:
201 return self.create_shards_dump()
Prashanth B064c9492014-09-06 15:29:32 -0700202 return self.create_mysql_dump()
203
204
205 def _get_name(self):
206 """Returns the name of the dump as presented to google storage."""
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700207 if self._type in [_ONLY_HOSTS, _ONLY_SHARDS]:
208 file_type = 'txt'
209 else:
210 file_type = 'gz'
Prashanth B064c9492014-09-06 15:29:32 -0700211 return 'autotest-dump.%s.%s' % (
212 datetime.datetime.now().strftime('%y.%m.%d'), file_type)
Chris Sosa621509d2012-04-04 16:02:52 -0700213
214
215 @staticmethod
216 def _retry_run(cmd):
217 """Run the specified |cmd| string, retrying if necessary.
218
219 Args:
220 cmd: The command to run.
221 """
222 for attempt in range(_ATTEMPTS):
223 try:
224 return utils.system_output(cmd)
225 except error.CmdError:
226 if attempt == _ATTEMPTS - 1:
227 raise
228 else:
229 logging.error('Failed to run %r', cmd)
230
231
232 def upload_to_google_storage(self, dump_file):
Prashanth B064c9492014-09-06 15:29:32 -0700233 """Uploads the given |dump_file| to Google Storage.
234
235 @param dump_file: The path to the file containing the dump.
236 """
Chris Sosa621509d2012-04-04 16:02:52 -0700237 cmd = '%(gs_util)s cp %(dump_file)s %(gs_dir)s/%(name)s'
238 input_dict = dict(gs_util=_GSUTIL_BIN, dump_file=dump_file,
239 name=self._get_name(), gs_dir=self._gs_dir)
240 cmd = cmd % input_dict
241 logging.debug('Uploading mysql dump to google storage')
242 self._retry_run(cmd)
243 os.remove(dump_file)
244
245
246 def _get_gs_command(self, cmd):
247 """Returns an array representing the command for rm or ls."""
248 # Helpful code to allow us to test without gs.
249 assert cmd in ['rm', 'ls']
250 gs_bin = _GSUTIL_BIN
Scott Zawalski6a6c9d12012-04-16 13:03:07 -0400251 if self._gs_dir.startswith('gs://'):
252 cmd_array = [gs_bin, cmd]
253 else:
254 cmd_array = [cmd]
Chris Sosa621509d2012-04-04 16:02:52 -0700255
256 return cmd_array
257
258
259 def _do_ls(self):
260 """Returns the output of running ls on the gs bucket."""
261 cmd = self._get_gs_command('ls') + [self._gs_dir]
262 return self._retry_run(' '.join(cmd))
263
264
265 def cleanup(self):
266 """Cleans up the gs bucket to ensure we don't over archive."""
267 logging.debug('Cleaning up previously archived dump files.')
268 listing = self._do_ls()
269 ordered_listing = sorted(listing.splitlines(), key=version.LooseVersion)
270 if len(ordered_listing) < self._number_to_keep:
271 logging.debug('Cleanup found nothing to do.')
272 return
273
274 to_remove = ordered_listing[:-self._number_to_keep]
275 rm_cmd = self._get_gs_command('rm')
276 for artifact in to_remove:
277 cmd = ' '.join(rm_cmd + [self._gs_dir + '/' + artifact])
278 self._retry_run(cmd)
279
280
281def parse_options():
282 """Parses given options."""
283 parser = optparse.OptionParser()
284 parser.add_option('--gs_bucket', default=_GS_BUCKET,
285 help='Google storage bucket to store mysql db dumps.')
286 parser.add_option('--keep', default=10, type=int,
287 help='Number of dumps to keep of specified type.')
288 parser.add_option('--type', default=_DAILY,
289 help='The type of mysql dump to store.')
290 parser.add_option('--verbose', default=False, action='store_true',
291 help='Google storage bucket to store mysql db dumps.')
292 options = parser.parse_args()[0]
293 if options.type not in _SCHEDULER_TYPES:
294 parser.error('Type must be either: %s.' % ', '.join(_SCHEDULER_TYPES))
295
296 return options
297
298
299def main():
300 """Runs the program."""
301 options = parse_options()
302 logging_manager.configure_logging(test_importer.TestImporterLoggingConfig(),
303 verbose=options.verbose)
Fang Deng1cbfe7c2015-05-29 16:04:33 -0700304 logging.debug('Start db backup: %s', options.type)
Chris Sosa621509d2012-04-04 16:02:52 -0700305 archiver = MySqlArchiver(options.type, options.keep, options.gs_bucket)
Prashanth B064c9492014-09-06 15:29:32 -0700306 dump_file = archiver.dump()
Fang Deng1cbfe7c2015-05-29 16:04:33 -0700307 logging.debug('Uploading backup: %s', options.type)
Chris Sosa621509d2012-04-04 16:02:52 -0700308 archiver.upload_to_google_storage(dump_file)
309 archiver.cleanup()
Fang Deng1cbfe7c2015-05-29 16:04:33 -0700310 logging.debug('Db backup completed: %s', options.type)
Chris Sosa621509d2012-04-04 16:02:52 -0700311
312
313if __name__ == '__main__':
314 main()