blob: 1e4ecc5e080829cf4cf605584a03e5e4195d9465 [file] [log] [blame]
Chris Sosa621509d2012-04-04 16:02:52 -07001#!/usr/bin/python
2
3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Module used to back up the mysql db and upload to Google Storage.
8
9Usage:
10 backup_mysql_db.py --type=weekly --gs_bucket=gs://my_bucket --keep 10
11
12 gs_bucket may refer to a local location by omitting gs:// and giving a local
13 path if desired for testing. The example usage above creates a dump
14 of the autotest db, uploads it to gs://my_bucket/weekly/dump_file.date and
15 cleans up older dumps if there are more than 10 in that directory.
16"""
17
18import datetime
19from distutils import version
20import logging
21import optparse
22import os
23import tempfile
24
25import common
26
27from autotest_lib.client.common_lib import error
28from autotest_lib.client.common_lib import global_config, logging_manager, utils
29from autotest_lib.utils import test_importer
30
31
32_ATTEMPTS = 3
33_GSUTIL_BIN = 'gsutil'
Scott Zawalski6a6c9d12012-04-16 13:03:07 -040034_GS_BUCKET = 'gs://chromeos-lab/backup/database'
Scott Zawalski6ccf2842012-08-31 20:45:43 -040035# TODO(scottz): Should we need to ignore more than one database a general
36# function should be designed that lists tables in the database and properly
37# creates the --ignore-table= args to be passed to mysqldump.
38# Tables to ignore when dumping all databases.
39# performance_schema is an internal database that cannot be dumped
40IGNORE_TABLES = ['performance_schema.cond_instances',
41 'performance_schema.events_waits_current',
42 'performance_schema.cond_instances',
43 'performance_schema.events_waits_history',
44 'performance_schema.events_waits_history_long',
45 'performance_schema.events_waits_summary_by_instance',
46 ('performance_schema.'
47 'events_waits_summary_by_thread_by_event_name'),
48 'performance_schema.events_waits_summary_global_by_event_name',
49 'performance_schema.file_instances',
50 'performance_schema.file_summary_by_event_name',
51 'performance_schema.file_summary_by_instance',
52 'performance_schema.mutex_instances',
53 'performance_schema.performance_timers',
54 'performance_schema.rwlock_instances',
55 'performance_schema.setup_consumers',
56 'performance_schema.setup_instruments',
57 'performance_schema.setup_timers',
58 'performance_schema.threads']
Prashanth B064c9492014-09-06 15:29:32 -070059
60# Conventional mysqldump schedules.
Chris Sosa621509d2012-04-04 16:02:52 -070061_DAILY = 'daily'
62_WEEKLY = 'weekly'
63_MONTHLY = 'monthly'
Prashanth B064c9492014-09-06 15:29:32 -070064
65# Contrary to a conventional mysql dump which takes O(hours) on large databases,
66# a host dump is the cheapest form of backup possible. We dump the output of a
67# of a mysql command showing all hosts and their pool labels to a text file that
68# is backed up to google storage.
69_ONLY_HOSTS = 'only_hosts'
Jakob Jueliche0e9c932014-09-24 14:40:48 -070070_ONLY_SHARDS = 'only_shards'
71_SCHEDULER_TYPES = [_ONLY_HOSTS, _ONLY_SHARDS, _DAILY, _WEEKLY, _MONTHLY]
Chris Sosa621509d2012-04-04 16:02:52 -070072
73
74class MySqlArchiver(object):
75 """Class that archives the Autotest MySQL DB to Google Storage.
76
77 Vars:
78 gs_dir: The path to the directory in Google Storage that this dump file
79 will be uploaded to.
80 number_to_keep: The number of dumps we should store.
81 """
82
83
84 def __init__(self, scheduled_type, number_to_keep, gs_bucket):
85 self._gs_dir = '/'.join([gs_bucket, scheduled_type])
86 self._number_to_keep = number_to_keep
Prashanth B064c9492014-09-06 15:29:32 -070087 self._type = scheduled_type
Chris Sosa621509d2012-04-04 16:02:52 -070088
89
90 @staticmethod
91 def _get_user_pass():
92 """Returns a tuple containing the user/pass to use to access the DB."""
93 user = global_config.global_config.get_config_value(
94 'CROS', 'db_backup_user')
95 password = global_config.global_config.get_config_value(
96 'CROS', 'db_backup_password')
97 return user, password
98
99
100 def create_mysql_dump(self):
101 """Returns the path to a mysql dump of the current autotest DB."""
102 user, password = self._get_user_pass()
103 _, filename = tempfile.mkstemp('autotest_db_dump')
104 logging.debug('Dumping mysql database to file %s', filename)
Scott Zawalski6ccf2842012-08-31 20:45:43 -0400105 extra_dump_args = ''
106 for entry in IGNORE_TABLES:
107 extra_dump_args += '--ignore-table=%s ' % entry
108
Chris Sosa621509d2012-04-04 16:02:52 -0700109 utils.system('set -o pipefail; mysqldump --all-databases --user=%s '
Scott Zawalski6ccf2842012-08-31 20:45:43 -0400110 '--password=%s %s | gzip - > %s' % (user, password,
111 extra_dump_args,
112 filename))
Chris Sosa621509d2012-04-04 16:02:52 -0700113 return filename
114
115
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700116 def _create_dump_from_query(self, query):
117 """Dumps result of a query into a text file.
118
119 @param query: Query to execute.
120
121 @return: The path to a tempfile containing the response of the query.
122 """
123 parameters = {'query': query}
124 parameters['user'], parameters['password'] = self._get_user_pass()
125 _, parameters['filename'] = tempfile.mkstemp('autotest_db_dump')
126 utils.system(
127 'set -o pipefail; mysql -u %(user)s -p%(password)s '
128 'chromeos_autotest_db -e "%(query)s" > %(filename)s' %
129 parameters)
130 return parameters['filename']
131
132
Prashanth B064c9492014-09-06 15:29:32 -0700133 def create_host_dump(self):
134 """Dumps hosts and their labels into a text file.
135
136 @return: The path to a tempfile containing a dump of
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700137 hosts and their pool labels.
Prashanth B064c9492014-09-06 15:29:32 -0700138 """
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700139 query = ('SELECT hostname, labels.name FROM afe_hosts AS hosts '
140 'JOIN afe_hosts_labels ON hosts.id = afe_hosts_labels.host_id '
141 'JOIN afe_labels AS labels '
142 'ON labels.id = afe_hosts_labels.label_id '
143 'WHERE labels.name LIKE \'%%pool%%\';')
144 return self._create_dump_from_query(query)
145
146
147 def create_shards_dump(self):
148 """Dumps shards and their labels into a text file.
149
150 @return: The path to a tempfile containing a dump of
151 shards and their labels.
152 """
153 query = ('SELECT hostname, labels.name FROM afe_shards AS shards '
154 'JOIN afe_shards_labels '
155 'ON shards.id = afe_shards_labels.shard_id '
156 'JOIN afe_labels AS labels '
157 'ON labels.id = afe_shards_labels.label_id;')
158 return self._create_dump_from_query(query)
Prashanth B064c9492014-09-06 15:29:32 -0700159
160
161 def dump(self):
162 """Creates a data dump based on the type of schedule.
163
164 @return: The path to a file containing the dump.
165 """
166 if self._type == _ONLY_HOSTS:
167 return self.create_host_dump()
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700168 if self._type == _ONLY_SHARDS:
169 return self.create_shards_dump()
Prashanth B064c9492014-09-06 15:29:32 -0700170 return self.create_mysql_dump()
171
172
173 def _get_name(self):
174 """Returns the name of the dump as presented to google storage."""
Jakob Jueliche0e9c932014-09-24 14:40:48 -0700175 if self._type in [_ONLY_HOSTS, _ONLY_SHARDS]:
176 file_type = 'txt'
177 else:
178 file_type = 'gz'
Prashanth B064c9492014-09-06 15:29:32 -0700179 return 'autotest-dump.%s.%s' % (
180 datetime.datetime.now().strftime('%y.%m.%d'), file_type)
Chris Sosa621509d2012-04-04 16:02:52 -0700181
182
183 @staticmethod
184 def _retry_run(cmd):
185 """Run the specified |cmd| string, retrying if necessary.
186
187 Args:
188 cmd: The command to run.
189 """
190 for attempt in range(_ATTEMPTS):
191 try:
192 return utils.system_output(cmd)
193 except error.CmdError:
194 if attempt == _ATTEMPTS - 1:
195 raise
196 else:
197 logging.error('Failed to run %r', cmd)
198
199
200 def upload_to_google_storage(self, dump_file):
Prashanth B064c9492014-09-06 15:29:32 -0700201 """Uploads the given |dump_file| to Google Storage.
202
203 @param dump_file: The path to the file containing the dump.
204 """
Chris Sosa621509d2012-04-04 16:02:52 -0700205 cmd = '%(gs_util)s cp %(dump_file)s %(gs_dir)s/%(name)s'
206 input_dict = dict(gs_util=_GSUTIL_BIN, dump_file=dump_file,
207 name=self._get_name(), gs_dir=self._gs_dir)
208 cmd = cmd % input_dict
209 logging.debug('Uploading mysql dump to google storage')
210 self._retry_run(cmd)
211 os.remove(dump_file)
212
213
214 def _get_gs_command(self, cmd):
215 """Returns an array representing the command for rm or ls."""
216 # Helpful code to allow us to test without gs.
217 assert cmd in ['rm', 'ls']
218 gs_bin = _GSUTIL_BIN
Scott Zawalski6a6c9d12012-04-16 13:03:07 -0400219 if self._gs_dir.startswith('gs://'):
220 cmd_array = [gs_bin, cmd]
221 else:
222 cmd_array = [cmd]
Chris Sosa621509d2012-04-04 16:02:52 -0700223
224 return cmd_array
225
226
227 def _do_ls(self):
228 """Returns the output of running ls on the gs bucket."""
229 cmd = self._get_gs_command('ls') + [self._gs_dir]
230 return self._retry_run(' '.join(cmd))
231
232
233 def cleanup(self):
234 """Cleans up the gs bucket to ensure we don't over archive."""
235 logging.debug('Cleaning up previously archived dump files.')
236 listing = self._do_ls()
237 ordered_listing = sorted(listing.splitlines(), key=version.LooseVersion)
238 if len(ordered_listing) < self._number_to_keep:
239 logging.debug('Cleanup found nothing to do.')
240 return
241
242 to_remove = ordered_listing[:-self._number_to_keep]
243 rm_cmd = self._get_gs_command('rm')
244 for artifact in to_remove:
245 cmd = ' '.join(rm_cmd + [self._gs_dir + '/' + artifact])
246 self._retry_run(cmd)
247
248
249def parse_options():
250 """Parses given options."""
251 parser = optparse.OptionParser()
252 parser.add_option('--gs_bucket', default=_GS_BUCKET,
253 help='Google storage bucket to store mysql db dumps.')
254 parser.add_option('--keep', default=10, type=int,
255 help='Number of dumps to keep of specified type.')
256 parser.add_option('--type', default=_DAILY,
257 help='The type of mysql dump to store.')
258 parser.add_option('--verbose', default=False, action='store_true',
259 help='Google storage bucket to store mysql db dumps.')
260 options = parser.parse_args()[0]
261 if options.type not in _SCHEDULER_TYPES:
262 parser.error('Type must be either: %s.' % ', '.join(_SCHEDULER_TYPES))
263
264 return options
265
266
267def main():
268 """Runs the program."""
269 options = parse_options()
270 logging_manager.configure_logging(test_importer.TestImporterLoggingConfig(),
271 verbose=options.verbose)
Fang Deng1cbfe7c2015-05-29 16:04:33 -0700272 logging.debug('Start db backup: %s', options.type)
Chris Sosa621509d2012-04-04 16:02:52 -0700273 archiver = MySqlArchiver(options.type, options.keep, options.gs_bucket)
Prashanth B064c9492014-09-06 15:29:32 -0700274 dump_file = archiver.dump()
Fang Deng1cbfe7c2015-05-29 16:04:33 -0700275 logging.debug('Uploading backup: %s', options.type)
Chris Sosa621509d2012-04-04 16:02:52 -0700276 archiver.upload_to_google_storage(dump_file)
277 archiver.cleanup()
Fang Deng1cbfe7c2015-05-29 16:04:33 -0700278 logging.debug('Db backup completed: %s', options.type)
Chris Sosa621509d2012-04-04 16:02:52 -0700279
280
281if __name__ == '__main__':
282 main()